Track all sampled textures in GrOpsTask and pass them to GrOpsRenderPass.

In Vulkan we use this list to set the layout for these surface to be
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL at GrOpsRenderPass creation
instead of at submit. This gets us closer to being able to run with
primary or secondary command buffers.

Change-Id: I6e307485987e2c024ed9ecba3e41f588047c5f07
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/238444
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 3e7e735..07c035d 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -407,7 +407,7 @@
 
 bool GrGpu::writePixels(GrSurface* surface, int left, int top, int width, int height,
                         GrColorType surfaceColorType, GrColorType srcColorType,
-                        const GrMipLevel texels[], int mipLevelCount) {
+                        const GrMipLevel texels[], int mipLevelCount, bool prepForTexSampling) {
     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
     SkASSERT(surface);
     SkASSERT(this->caps()->isFormatTexturableAndUploadable(surfaceColorType,
@@ -438,7 +438,7 @@
 
     this->handleDirtyContext();
     if (this->onWritePixels(surface, left, top, width, height, surfaceColorType, srcColorType,
-                            texels, mipLevelCount)) {
+                            texels, mipLevelCount, prepForTexSampling)) {
         SkIRect rect = SkIRect::MakeXYWH(left, top, width, height);
         this->didWriteToSurface(surface, kTopLeft_GrSurfaceOrigin, &rect, mipLevelCount);
         fStats.incTextureUploads();
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index a221cc7..e4ebed3 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -210,17 +210,24 @@
     /**
      * Updates the pixels in a rectangle of a surface.  No sRGB/linear conversions are performed.
      *
-     * @param surface           The surface to write to.
-     * @param left              left edge of the rectangle to write (inclusive)
-     * @param top               top edge of the rectangle to write (inclusive)
-     * @param width             width of rectangle to write in pixels.
-     * @param height            height of rectangle to write in pixels.
-     * @param surfaceColorType  the color type for this use of the surface.
-     * @param srcColorType      the color type of the source buffer.
-     * @param texels            array of mipmap levels containing texture data. Row bytes must be a
-     *                          multiple of srcColorType's bytes-per-pixel. Must be tight to level
-     *                          width if !caps->writePixelsRowBytesSupport().
-     * @param mipLevelCount     number of levels in 'texels'
+     * @param surface            The surface to write to.
+     * @param left               left edge of the rectangle to write (inclusive)
+     * @param top                top edge of the rectangle to write (inclusive)
+     * @param width              width of rectangle to write in pixels.
+     * @param height             height of rectangle to write in pixels.
+     * @param surfaceColorType   the color type for this use of the surface.
+     * @param srcColorType       the color type of the source buffer.
+     * @param texels             array of mipmap levels containing texture data. Row bytes must be a
+     *                           multiple of srcColorType's bytes-per-pixel. Must be tight to level
+     *                           width if !caps->writePixelsRowBytesSupport().
+     * @param mipLevelCount      number of levels in 'texels'
+     * @param prepForTexSampling After doing write pixels should the surface be prepared for texture
+     *                           sampling. This is currently only used by Vulkan for inline uploads
+     *                           to set that layout back to sampled after doing the upload. Inline
+     *                           uploads currently can happen between draws in a single op so it is
+     *                           not trivial to break up the GrOpsTask into two tasks when we see
+     *                           an inline upload. However, once we are able to support doing that
+     *                           we can remove this parameter.
      *
      * @return true if the write succeeded, false if not. The read can fail
      *              because of the surface doesn't support writing (e.g. read only),
@@ -229,17 +236,17 @@
      */
     bool writePixels(GrSurface* surface, int left, int top, int width, int height,
                      GrColorType surfaceColorType, GrColorType srcColorType,
-                     const GrMipLevel texels[], int mipLevelCount);
+                     const GrMipLevel texels[], int mipLevelCount, bool prepForTexSampling = false);
 
     /**
      * Helper for the case of a single level.
      */
     bool writePixels(GrSurface* surface, int left, int top, int width, int height,
                      GrColorType surfaceColorType, GrColorType srcColorType, const void* buffer,
-                     size_t rowBytes) {
+                     size_t rowBytes, bool prepForTexSampling = false) {
         GrMipLevel mipLevel = {buffer, rowBytes};
         return this->writePixels(surface, left, top, width, height, surfaceColorType, srcColorType,
-                                 &mipLevel, 1);
+                                 &mipLevel, 1, prepForTexSampling);
     }
 
     /**
@@ -315,7 +322,8 @@
     virtual GrOpsRenderPass* getOpsRenderPass(
             GrRenderTarget* renderTarget, GrSurfaceOrigin, const SkRect& bounds,
             const GrOpsRenderPass::LoadAndStoreInfo&,
-            const GrOpsRenderPass::StencilLoadAndStoreInfo&) = 0;
+            const GrOpsRenderPass::StencilLoadAndStoreInfo&,
+            const SkTArray<GrTextureProxy*, true>& sampledProxies) = 0;
 
     // Called by GrDrawingManager when flushing.
     // Provides a hook for post-flush actions (e.g. Vulkan command buffer submits). This will also
@@ -573,7 +581,8 @@
     // overridden by backend-specific derived class to perform the surface write
     virtual bool onWritePixels(GrSurface*, int left, int top, int width, int height,
                                GrColorType surfaceColorType, GrColorType srcColorType,
-                               const GrMipLevel texels[], int mipLevelCount) = 0;
+                               const GrMipLevel texels[], int mipLevelCount,
+                               bool prepForTexSampling) = 0;
 
     // overridden by backend-specific derived class to perform the texture transfer
     virtual bool onTransferPixelsTo(GrTexture*, int left, int top, int width, int height,
diff --git a/src/gpu/GrOpFlushState.cpp b/src/gpu/GrOpFlushState.cpp
index ba74b3e..586a1df 100644
--- a/src/gpu/GrOpFlushState.cpp
+++ b/src/gpu/GrOpFlushState.cpp
@@ -83,10 +83,11 @@
     fBaseDrawToken = GrDeferredUploadToken::AlreadyFlushedToken();
 }
 
-void GrOpFlushState::doUpload(GrDeferredTextureUploadFn& upload) {
-    GrDeferredTextureUploadWritePixelsFn wp = [this](GrTextureProxy* dstProxy, int left, int top,
-                                                     int width, int height, GrColorType colorType,
-                                                     const void* buffer, size_t rowBytes) {
+void GrOpFlushState::doUpload(GrDeferredTextureUploadFn& upload,
+                              bool shouldPrepareSurfaceForSampling) {
+    GrDeferredTextureUploadWritePixelsFn wp = [this, shouldPrepareSurfaceForSampling](
+            GrTextureProxy* dstProxy, int left, int top, int width, int height,
+            GrColorType colorType, const void* buffer, size_t rowBytes) {
         GrSurface* dstSurface = dstProxy->peekSurface();
         if (!fGpu->caps()->surfaceSupportsWritePixels(dstSurface)) {
             return false;
@@ -111,7 +112,8 @@
             buffer = tmpPixels.get();
         }
         return this->fGpu->writePixels(dstSurface, left, top, width, height, colorType,
-                                       supportedWrite.fColorType, buffer, rowBytes);
+                                       supportedWrite.fColorType, buffer, rowBytes,
+                                       shouldPrepareSurfaceForSampling);
     };
     upload(wp);
 }
diff --git a/src/gpu/GrOpFlushState.h b/src/gpu/GrOpFlushState.h
index aa65733..ed8a39c 100644
--- a/src/gpu/GrOpFlushState.h
+++ b/src/gpu/GrOpFlushState.h
@@ -36,7 +36,11 @@
         executed. */
     void preExecuteDraws();
 
-    void doUpload(GrDeferredTextureUploadFn&);
+    /** Called to upload data to a texture using the GrDeferredTextureUploadFn. If the uploaded
+        surface needs to be prepared for being sampled in a draw after the upload, the caller
+        should pass in true for shouldPrepareSurfaceForSampling. This feature is needed for Vulkan
+        when doing inline uploads to reset the image layout back to sampled. */
+    void doUpload(GrDeferredTextureUploadFn&, bool shouldPrepareSurfaceForSampling = false);
 
     /** Called as ops are executed. Must be called in the same order as the ops were prepared. */
     void executeDrawsAndUploadsForMeshDrawOp(
@@ -72,6 +76,14 @@
         return *fOpArgs;
     }
 
+    void setSampledProxyArray(SkTArray<GrTextureProxy*, true>* sampledProxies) {
+        fSampledProxies = sampledProxies;
+    }
+
+    SkTArray<GrTextureProxy*, true>* sampledProxyArray() override {
+        return fSampledProxies;
+    }
+
     /** Overrides of GrDeferredUploadTarget. */
 
     const GrTokenTracker* tokenTracker() final { return fTokenTracker; }
@@ -152,6 +164,10 @@
     // an op is not currently preparing of executing.
     OpArgs* fOpArgs = nullptr;
 
+    // This field is only transiently set during flush. Each GrOpsTask will set it to point to an
+    // array of proxies it uses before call onPrepare and onExecute.
+    SkTArray<GrTextureProxy*, true>* fSampledProxies;
+
     GrGpu* fGpu;
     GrResourceProvider* fResourceProvider;
     GrTokenTracker* fTokenTracker;
diff --git a/src/gpu/GrOpsTask.cpp b/src/gpu/GrOpsTask.cpp
index 69f5780..86bcf9b 100644
--- a/src/gpu/GrOpsTask.cpp
+++ b/src/gpu/GrOpsTask.cpp
@@ -384,6 +384,7 @@
 
     fTarget.reset();
     fDeferredProxies.reset();
+    fSampledProxies.reset();
     fAuditTrail = nullptr;
 }
 
@@ -394,6 +395,7 @@
     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
 #endif
 
+    flushState->setSampledProxyArray(&fSampledProxies);
     // Loop over the ops that haven't yet been prepared.
     for (const auto& chain : fOpChains) {
         if (chain.shouldExecute()) {
@@ -412,15 +414,13 @@
             flushState->setOpArgs(nullptr);
         }
     }
+    flushState->setSampledProxyArray(nullptr);
 }
 
-static GrOpsRenderPass* create_command_buffer(GrGpu* gpu,
-                                              GrRenderTarget* rt,
-                                              GrSurfaceOrigin origin,
-                                              const SkRect& bounds,
-                                              GrLoadOp colorLoadOp,
-                                              const SkPMColor4f& loadClearColor,
-                                              GrLoadOp stencilLoadOp) {
+static GrOpsRenderPass* create_command_buffer(
+        GrGpu* gpu, GrRenderTarget* rt, GrSurfaceOrigin origin, const SkRect& bounds,
+        GrLoadOp colorLoadOp, const SkPMColor4f& loadClearColor, GrLoadOp stencilLoadOp,
+        const SkTArray<GrTextureProxy*, true>& sampledProxies) {
     const GrOpsRenderPass::LoadAndStoreInfo kColorLoadStoreInfo {
         colorLoadOp,
         GrStoreOp::kStore,
@@ -437,7 +437,8 @@
         GrStoreOp::kStore,
     };
 
-    return gpu->getOpsRenderPass(rt, origin, bounds, kColorLoadStoreInfo, stencilLoadAndStoreInfo);
+    return gpu->getOpsRenderPass(rt, origin, bounds, kColorLoadStoreInfo, stencilLoadAndStoreInfo,
+                                 sampledProxies);
 }
 
 // TODO: this is where GrOp::renderTarget is used (which is fine since it
@@ -466,7 +467,8 @@
                                                     fTarget->getBoundsRect(),
                                                     fColorLoadOp,
                                                     fLoadClearColor,
-                                                    fStencilLoadOp);
+                                                    fStencilLoadOp,
+                                                    fSampledProxies);
     flushState->setOpsRenderPass(renderPass);
     renderPass->begin();
 
@@ -520,6 +522,7 @@
     if (CanDiscardPreviousOps::kYes == canDiscardPreviousOps || this->isEmpty()) {
         this->deleteOps();
         fDeferredProxies.reset();
+        fSampledProxies.reset();
 
         // If the opsTask is using a render target which wraps a vulkan command buffer, we can't do
         // a clear load since we cannot change the render pass that we are using. Thus we fall back
diff --git a/src/gpu/GrOpsTask.h b/src/gpu/GrOpsTask.h
index b005671..e2f2682 100644
--- a/src/gpu/GrOpsTask.h
+++ b/src/gpu/GrOpsTask.h
@@ -57,6 +57,10 @@
     void onPrepare(GrOpFlushState* flushState) override;
     bool onExecute(GrOpFlushState* flushState) override;
 
+    void addSampledTexture(GrTextureProxy* proxy) {
+        fSampledProxies.push_back(proxy);
+    }
+
     void addOp(std::unique_ptr<GrOp> op, GrTextureResolveManager textureResolveManager,
                const GrCaps& caps) {
         auto addDependency = [ textureResolveManager, &caps, this ] (
@@ -80,12 +84,14 @@
                    GrTextureResolveManager textureResolveManager, const GrCaps& caps) {
         auto addDependency = [ textureResolveManager, &caps, this ] (
                 GrTextureProxy* p, GrMipMapped mipmapped) {
+            this->addSampledTexture(p);
             this->addDependency(p, mipmapped, textureResolveManager, caps);
         };
 
         op->visitProxies(addDependency);
         clip.visitProxies(addDependency);
         if (dstProxy.proxy()) {
+            this->addSampledTexture(dstProxy.proxy());
             addDependency(dstProxy.proxy(), GrMipMapped::kNo);
         }
 
@@ -261,6 +267,9 @@
     SkArenaAlloc fClipAllocator{4096};
     SkDEBUGCODE(int fNumClips;)
 
+    // TODO: We could look into this being a set if we find we're adding a lot of duplicates that is
+    // causing slow downs.
+    SkTArray<GrTextureProxy*, true> fSampledProxies;
 };
 
 #endif
diff --git a/src/gpu/ccpr/GrCCDrawPathsOp.cpp b/src/gpu/ccpr/GrCCDrawPathsOp.cpp
index 4ad6cd9..1d7642e 100644
--- a/src/gpu/ccpr/GrCCDrawPathsOp.cpp
+++ b/src/gpu/ccpr/GrCCDrawPathsOp.cpp
@@ -414,6 +414,15 @@
     SkASSERT(fInstanceRanges.back().fAtlasProxy == atlasProxy);
 }
 
+void GrCCDrawPathsOp::onPrepare(GrOpFlushState* flushState) {
+    // The CCPR ops don't know their atlas textures until after the preFlush calls have been
+    // executed at the start GrDrawingManger::flush. Thus the proxies are not added during the
+    // normal visitProxies calls doing addDrawOp. Therefore, the atlas proxies are added now.
+    for (const InstanceRange& range : fInstanceRanges) {
+        flushState->sampledProxyArray()->push_back(range.fAtlasProxy);
+    }
+}
+
 void GrCCDrawPathsOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
     SkASSERT(fOwningPerOpsTaskPaths);
 
diff --git a/src/gpu/ccpr/GrCCDrawPathsOp.h b/src/gpu/ccpr/GrCCDrawPathsOp.h
index d0c4777..2a63953 100644
--- a/src/gpu/ccpr/GrCCDrawPathsOp.h
+++ b/src/gpu/ccpr/GrCCDrawPathsOp.h
@@ -44,7 +44,7 @@
         }
         fProcessors.visitProxies(fn);
     }
-    void onPrepare(GrOpFlushState*) override {}
+    void onPrepare(GrOpFlushState*) override;
 
     void addToOwningPerOpsTaskPaths(sk_sp<GrCCPerOpsTaskPaths> owningPerOpsTaskPaths);
 
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index ed722c8..bdcfa79 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -834,7 +834,8 @@
 
 bool GrGLGpu::onWritePixels(GrSurface* surface, int left, int top, int width, int height,
                             GrColorType surfaceColorType, GrColorType srcColorType,
-                            const GrMipLevel texels[], int mipLevelCount) {
+                            const GrMipLevel texels[], int mipLevelCount,
+                            bool prepForTexSampling) {
     auto glTex = static_cast<GrGLTexture*>(surface->asTexture());
 
     if (!check_write_and_transfer_input(glTex)) {
@@ -2247,7 +2248,8 @@
 GrOpsRenderPass* GrGLGpu::getOpsRenderPass(
         GrRenderTarget* rt, GrSurfaceOrigin origin, const SkRect& bounds,
         const GrOpsRenderPass::LoadAndStoreInfo& colorInfo,
-        const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo) {
+        const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo,
+        const SkTArray<GrTextureProxy*, true>& sampledProxies) {
     if (!fCachedOpsRenderPass) {
         fCachedOpsRenderPass.reset(new GrGLOpsRenderPass(this));
     }
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index d494be3..4f28104 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -124,7 +124,8 @@
     GrOpsRenderPass* getOpsRenderPass(
             GrRenderTarget*, GrSurfaceOrigin, const SkRect&,
             const GrOpsRenderPass::LoadAndStoreInfo&,
-            const GrOpsRenderPass::StencilLoadAndStoreInfo&) override;
+            const GrOpsRenderPass::StencilLoadAndStoreInfo&,
+            const SkTArray<GrTextureProxy*, true>& sampledProxies) override;
 
     void invalidateBoundRenderTarget() {
         fHWBoundRenderTargetUniqueID.makeInvalid();
@@ -244,7 +245,8 @@
 
     bool onWritePixels(GrSurface*, int left, int top, int width, int height,
                        GrColorType surfaceColorType, GrColorType srcColorType,
-                       const GrMipLevel texels[], int mipLevelCount) override;
+                       const GrMipLevel texels[], int mipLevelCount,
+                       bool prepForTexSampling) override;
 
     bool onTransferPixelsTo(GrTexture*, int left, int top, int width, int height,
                             GrColorType textureColorType, GrColorType bufferColorType,
diff --git a/src/gpu/mock/GrMockGpu.cpp b/src/gpu/mock/GrMockGpu.cpp
index 8101da8..817bb08 100644
--- a/src/gpu/mock/GrMockGpu.cpp
+++ b/src/gpu/mock/GrMockGpu.cpp
@@ -55,7 +55,8 @@
 GrOpsRenderPass* GrMockGpu::getOpsRenderPass(
                                 GrRenderTarget* rt, GrSurfaceOrigin origin, const SkRect& bounds,
                                 const GrOpsRenderPass::LoadAndStoreInfo& colorInfo,
-                                const GrOpsRenderPass::StencilLoadAndStoreInfo&) {
+                                const GrOpsRenderPass::StencilLoadAndStoreInfo&,
+                                const SkTArray<GrTextureProxy*, true>& sampledProxies) {
     return new GrMockOpsRenderPass(this, rt, origin, colorInfo);
 }
 
diff --git a/src/gpu/mock/GrMockGpu.h b/src/gpu/mock/GrMockGpu.h
index 86efe99..7334591 100644
--- a/src/gpu/mock/GrMockGpu.h
+++ b/src/gpu/mock/GrMockGpu.h
@@ -24,9 +24,11 @@
 
     ~GrMockGpu() override {}
 
-    GrOpsRenderPass* getOpsRenderPass(GrRenderTarget*, GrSurfaceOrigin, const SkRect&,
-                                      const GrOpsRenderPass::LoadAndStoreInfo&,
-                                      const GrOpsRenderPass::StencilLoadAndStoreInfo&) override;
+    GrOpsRenderPass* getOpsRenderPass(
+            GrRenderTarget*, GrSurfaceOrigin, const SkRect&,
+            const GrOpsRenderPass::LoadAndStoreInfo&,
+            const GrOpsRenderPass::StencilLoadAndStoreInfo&,
+            const SkTArray<GrTextureProxy*, true>& sampledProxies) override;
 
     GrFence SK_WARN_UNUSED_RESULT insertFence() override { return 0; }
     bool waitFence(GrFence, uint64_t) override { return true; }
@@ -94,7 +96,8 @@
 
     bool onWritePixels(GrSurface* surface, int left, int top, int width, int height,
                        GrColorType surfaceColorType, GrColorType srcColorType,
-                       const GrMipLevel texels[], int mipLevelCount) override {
+                       const GrMipLevel texels[], int mipLevelCount,
+                       bool prepForTexSampling) override {
         return true;
     }
 
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 082f06c..a944349 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -81,9 +81,11 @@
     bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
                        const SkIPoint& dstPoint) override;
 
-    GrOpsRenderPass* getOpsRenderPass(GrRenderTarget*, GrSurfaceOrigin, const SkRect& bounds,
-                                      const GrOpsRenderPass::LoadAndStoreInfo&,
-                                      const GrOpsRenderPass::StencilLoadAndStoreInfo&) override;
+    GrOpsRenderPass* getOpsRenderPass(
+            GrRenderTarget*, GrSurfaceOrigin, const SkRect& bounds,
+            const GrOpsRenderPass::LoadAndStoreInfo&,
+            const GrOpsRenderPass::StencilLoadAndStoreInfo&,
+            const SkTArray<GrTextureProxy*, true>& sampledProxies) override;
 
     SkSL::Compiler* shaderCompiler() const { return fCompiler.get(); }
 
@@ -168,7 +170,8 @@
 
     bool onWritePixels(GrSurface*, int left, int top, int width, int height,
                        GrColorType surfaceColorType, GrColorType bufferColorType,
-                       const GrMipLevel[], int mipLevelCount) override;
+                       const GrMipLevel[], int mipLevelCount,
+                       bool prepForTexSampling) override;
 
     bool onTransferPixelsTo(GrTexture*, int left, int top, int width, int height,
                             GrColorType textureColorType, GrColorType bufferColorType, GrGpuBuffer*,
diff --git a/src/gpu/mtl/GrMtlGpu.mm b/src/gpu/mtl/GrMtlGpu.mm
index 6bde931..37fd1a8 100644
--- a/src/gpu/mtl/GrMtlGpu.mm
+++ b/src/gpu/mtl/GrMtlGpu.mm
@@ -152,7 +152,8 @@
 GrOpsRenderPass* GrMtlGpu::getOpsRenderPass(
             GrRenderTarget* renderTarget, GrSurfaceOrigin origin, const SkRect& bounds,
             const GrOpsRenderPass::LoadAndStoreInfo& colorInfo,
-            const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo) {
+            const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo,
+            const SkTArray<GrTextureProxy*, true>& sampledProxies) {
     return new GrMtlOpsRenderPass(this, renderTarget, origin, colorInfo, stencilInfo);
 }
 
@@ -948,7 +949,8 @@
 
 bool GrMtlGpu::onWritePixels(GrSurface* surface, int left, int top, int width, int height,
                              GrColorType surfaceColorType, GrColorType srcColorType,
-                             const GrMipLevel texels[], int mipLevelCount) {
+                             const GrMipLevel texels[], int mipLevelCount,
+                             bool prepForTexSampling) {
     GrMtlTexture* mtlTexture = static_cast<GrMtlTexture*>(surface->asTexture());
     // TODO: In principle we should be able to support pure rendertargets as well, but
     // until we find a use case we'll only support texture rendertargets.
diff --git a/src/gpu/ops/GrAtlasTextOp.cpp b/src/gpu/ops/GrAtlasTextOp.cpp
index 1761acc..6e7d25d 100644
--- a/src/gpu/ops/GrAtlasTextOp.cpp
+++ b/src/gpu/ops/GrAtlasTextOp.cpp
@@ -305,6 +305,9 @@
     auto fixedDynamicState = target->makeFixedDynamicState(kMaxTextures);
     for (unsigned i = 0; i < numActiveProxies; ++i) {
         fixedDynamicState->fPrimitiveProcessorTextures[i] = proxies[i].get();
+        // This op does not know its atlas proxies when it is added to a GrOpsTasks, so the proxies
+        // don't get added during the visitProxies call. Thus we add them here.
+        target->sampledProxyArray()->push_back(proxies[i].get());
     }
 
     FlushInfo flushInfo;
@@ -413,6 +416,9 @@
         // Update the proxies used in the GP to match.
         for (unsigned i = gp->numTextureSamplers(); i < numActiveProxies; ++i) {
             flushInfo->fFixedDynamicState->fPrimitiveProcessorTextures[i] = proxies[i].get();
+            // This op does not know its atlas proxies when it is added to a GrOpsTasks, so the
+            // proxies don't get added during the visitProxies call. Thus we add them here.
+            target->sampledProxyArray()->push_back(proxies[i].get());
         }
         if (this->usesDistanceFields()) {
             if (this->isLCD()) {
diff --git a/src/gpu/ops/GrMeshDrawOp.h b/src/gpu/ops/GrMeshDrawOp.h
index 2140b7d..5baf985 100644
--- a/src/gpu/ops/GrMeshDrawOp.h
+++ b/src/gpu/ops/GrMeshDrawOp.h
@@ -160,6 +160,11 @@
     virtual GrStrikeCache* glyphCache() const = 0;
     virtual GrAtlasManager* atlasManager() const = 0;
 
+    // This should be called during onPrepare of a GrOp. The caller should add any proxies to the
+    // array it will use that it did not access during a call to visitProxies. This is usually the
+    // case for atlases.
+    virtual SkTArray<GrTextureProxy*, true>* sampledProxyArray() = 0;
+
     virtual const GrCaps& caps() const = 0;
 
     virtual GrDeferredUploadTarget* deferredUploadTarget() = 0;
diff --git a/src/gpu/ops/GrSmallPathRenderer.cpp b/src/gpu/ops/GrSmallPathRenderer.cpp
index f6a3705..bfabd47 100644
--- a/src/gpu/ops/GrSmallPathRenderer.cpp
+++ b/src/gpu/ops/GrSmallPathRenderer.cpp
@@ -326,7 +326,10 @@
         int numActiveProxies = fAtlas->numActivePages();
         const auto proxies = fAtlas->getProxies();
         for (int i = 0; i < numActiveProxies; ++i) {
+            // This op does not know its atlas proxies when it is added to a GrOpsTasks, so the
+            // proxies don't get added during the visitProxies call. Thus we add them here.
             flushInfo.fFixedDynamicState->fPrimitiveProcessorTextures[i] = proxies[i].get();
+            target->sampledProxyArray()->push_back(proxies[i].get());
         }
 
         // Setup GrGeometryProcessor
@@ -777,6 +780,9 @@
         if (gp->numTextureSamplers() != numAtlasTextures) {
             for (int i = gp->numTextureSamplers(); i < numAtlasTextures; ++i) {
                 flushInfo->fFixedDynamicState->fPrimitiveProcessorTextures[i] = proxies[i].get();
+                // This op does not know its atlas proxies when it is added to a GrOpsTasks, so the
+                // proxies don't get added during the visitProxies call. Thus we add them here.
+                target->sampledProxyArray()->push_back(proxies[i].get());
             }
             // During preparation the number of atlas pages has increased.
             // Update the proxies used in the GP to match.
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 4f6c785..bda54fa 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -321,12 +321,13 @@
 GrOpsRenderPass* GrVkGpu::getOpsRenderPass(
             GrRenderTarget* rt, GrSurfaceOrigin origin, const SkRect& bounds,
             const GrOpsRenderPass::LoadAndStoreInfo& colorInfo,
-            const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo) {
+            const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo,
+            const SkTArray<GrTextureProxy*, true>& sampledProxies) {
     if (!fCachedOpsRenderPass) {
         fCachedOpsRenderPass.reset(new GrVkOpsRenderPass(this));
     }
 
-    fCachedOpsRenderPass->set(rt, origin, colorInfo, stencilInfo);
+    fCachedOpsRenderPass->set(rt, origin, colorInfo, stencilInfo, sampledProxies);
     return fCachedOpsRenderPass.get();
 }
 
@@ -411,7 +412,8 @@
 
 bool GrVkGpu::onWritePixels(GrSurface* surface, int left, int top, int width, int height,
                             GrColorType surfaceColorType, GrColorType srcColorType,
-                            const GrMipLevel texels[], int mipLevelCount) {
+                            const GrMipLevel texels[], int mipLevelCount,
+                            bool prepForTexSampling) {
     GrVkTexture* vkTex = static_cast<GrVkTexture*>(surface->asTexture());
     if (!vkTex) {
         return false;
@@ -447,6 +449,12 @@
                                              mipLevelCount);
     }
 
+    if (prepForTexSampling) {
+        vkTex->setImageLayout(this, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                              VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+                              false);
+    }
+
     return success;
 }
 
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index e6c558c..29e47d0 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -104,7 +104,8 @@
     GrOpsRenderPass* getOpsRenderPass(
             GrRenderTarget*, GrSurfaceOrigin, const SkRect&,
             const GrOpsRenderPass::LoadAndStoreInfo&,
-            const GrOpsRenderPass::StencilLoadAndStoreInfo&) override;
+            const GrOpsRenderPass::StencilLoadAndStoreInfo&,
+            const SkTArray<GrTextureProxy*, true>& sampledProxies) override;
 
     void addBufferMemoryBarrier(const GrVkResource*,
                                 VkPipelineStageFlags srcStageMask,
@@ -224,7 +225,8 @@
 
     bool onWritePixels(GrSurface* surface, int left, int top, int width, int height,
                        GrColorType surfaceColorType, GrColorType srcColorType,
-                       const GrMipLevel texels[], int mipLevelCount) override;
+                       const GrMipLevel texels[], int mipLevelCount,
+                       bool prepForTexSampling) override;
 
     bool onTransferPixelsTo(GrTexture*, int left, int top, int width, int height,
                             GrColorType textureColorType, GrColorType bufferColorType,
diff --git a/src/gpu/vk/GrVkOpsRenderPass.cpp b/src/gpu/vk/GrVkOpsRenderPass.cpp
index 628cdb2..a42bef4 100644
--- a/src/gpu/vk/GrVkOpsRenderPass.cpp
+++ b/src/gpu/vk/GrVkOpsRenderPass.cpp
@@ -36,7 +36,7 @@
     InlineUpload(GrOpFlushState* state, const GrDeferredTextureUploadFn& upload)
             : fFlushState(state), fUpload(upload) {}
 
-    void execute(const Args& args) override { fFlushState->doUpload(fUpload); }
+    void execute(const Args& args) override { fFlushState->doUpload(fUpload, true); }
 
 private:
     GrOpFlushState* fFlushState;
@@ -183,13 +183,6 @@
 
         // We don't want to actually submit the secondary command buffer if it is wrapped.
         if (this->wrapsSecondaryCommandBuffer()) {
-            // If we have any sampled images set their layout now.
-            for (int j = 0; j < cbInfo.fSampledTextures.count(); ++j) {
-                cbInfo.fSampledTextures[j]->setImageLayout(
-                        fGpu, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT,
-                        VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, false);
-            }
-
             // There should have only been one secondary command buffer in the wrapped case so it is
             // safe to just return here.
             SkASSERT(fCommandBufferInfos.count() == 1);
@@ -234,13 +227,6 @@
                                           false);
             }
 
-            // If we have any sampled images set their layout now.
-            for (int j = 0; j < cbInfo.fSampledTextures.count(); ++j) {
-                cbInfo.fSampledTextures[j]->setImageLayout(
-                        fGpu, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT,
-                        VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, false);
-            }
-
             SkIRect iBounds;
             cbInfo.fBounds.roundOut(&iBounds);
 
@@ -253,7 +239,8 @@
 
 void GrVkOpsRenderPass::set(GrRenderTarget* rt, GrSurfaceOrigin origin,
                             const GrOpsRenderPass::LoadAndStoreInfo& colorInfo,
-                            const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo) {
+                            const GrOpsRenderPass::StencilLoadAndStoreInfo& stencilInfo,
+                            const SkTArray<GrTextureProxy*, true>& sampledProxies) {
     SkASSERT(!fRenderTarget);
     SkASSERT(fCommandBufferInfos.empty());
     SkASSERT(-1 == fCurrentCmdInfo);
@@ -266,6 +253,16 @@
 
     this->INHERITED::set(rt, origin);
 
+    for (int i = 0; i < sampledProxies.count(); ++i) {
+        if (sampledProxies[i]->isInstantiated()) {
+            GrVkTexture* vkTex = static_cast<GrVkTexture*>(sampledProxies[i]->peekTexture());
+            SkASSERT(vkTex);
+            vkTex->setImageLayout(
+                    fGpu, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT,
+                    VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, false);
+        }
+    }
+
     if (this->wrapsSecondaryCommandBuffer()) {
         this->initWrapped();
         return;
@@ -577,6 +574,15 @@
     return pipelineState;
 }
 
+#ifdef SK_DEBUG
+void check_sampled_texture(GrTexture* tex, GrRenderTarget* rt, GrVkGpu* gpu) {
+    SkASSERT(!tex->isProtected() || (rt->isProtected() && gpu->protectedContext()));
+    GrVkTexture* vkTex = static_cast<GrVkTexture*>(tex);
+    SkASSERT(vkTex->currentLayout() == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+}
+#endif
+
+
 void GrVkOpsRenderPass::onDraw(const GrPrimitiveProcessor& primProc,
                                     const GrPipeline& pipeline,
                                     const GrPipeline::FixedDynamicState* fixedDynamicState,
@@ -590,29 +596,31 @@
 
     CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
 
+#ifdef SK_DEBUG
     if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
         for (int m = 0, i = 0; m < meshCount; ++m) {
             for (int s = 0; s < primProc.numTextureSamplers(); ++s, ++i) {
                 auto texture = dynamicStateArrays->fPrimitiveProcessorTextures[i]->peekTexture();
-                this->appendSampledTexture(texture);
+                check_sampled_texture(texture, fRenderTarget, fGpu);
             }
         }
     } else {
         for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
             auto texture = fixedDynamicState->fPrimitiveProcessorTextures[i]->peekTexture();
-            this->appendSampledTexture(texture);
+            check_sampled_texture(texture, fRenderTarget, fGpu);
         }
     }
     GrFragmentProcessor::Iter iter(pipeline);
     while (const GrFragmentProcessor* fp = iter.next()) {
         for (int i = 0; i < fp->numTextureSamplers(); ++i) {
             const GrFragmentProcessor::TextureSampler& sampler = fp->textureSampler(i);
-            this->appendSampledTexture(sampler.peekTexture());
+            check_sampled_texture(sampler.peekTexture(), fRenderTarget, fGpu);
         }
     }
     if (GrTexture* dstTexture = pipeline.peekDstTexture()) {
-        this->appendSampledTexture(dstTexture);
+        check_sampled_texture(dstTexture, fRenderTarget, fGpu);
     }
+#endif
 
     GrPrimitiveType primitiveType = meshes[0].primitiveType();
     GrVkPipelineState* pipelineState = this->prepareDrawState(primProc, pipeline, fixedDynamicState,
@@ -656,13 +664,6 @@
     cbInfo.fIsEmpty = false;
 }
 
-void GrVkOpsRenderPass::appendSampledTexture(GrTexture* tex) {
-    SkASSERT(!tex->isProtected() || (fRenderTarget->isProtected() && fGpu->protectedContext()));
-    GrVkTexture* vkTex = static_cast<GrVkTexture*>(tex);
-
-    fCommandBufferInfos[fCurrentCmdInfo].fSampledTextures.push_back(sk_ref_sp(vkTex));
-}
-
 void GrVkOpsRenderPass::sendInstancedMeshToGpu(GrPrimitiveType,
                                                const GrBuffer* vertexBuffer,
                                                int vertexCount,
diff --git a/src/gpu/vk/GrVkOpsRenderPass.h b/src/gpu/vk/GrVkOpsRenderPass.h
index 0e68f80..e60c3e8 100644
--- a/src/gpu/vk/GrVkOpsRenderPass.h
+++ b/src/gpu/vk/GrVkOpsRenderPass.h
@@ -58,7 +58,8 @@
 
     void set(GrRenderTarget*, GrSurfaceOrigin,
              const GrOpsRenderPass::LoadAndStoreInfo&,
-             const GrOpsRenderPass::StencilLoadAndStoreInfo&);
+             const GrOpsRenderPass::StencilLoadAndStoreInfo&,
+             const SkTArray<GrTextureProxy*, true>& sampledProxies);
     void reset();
 
     void submit();
@@ -97,8 +98,6 @@
                 int meshCount,
                 const SkRect& bounds) override;
 
-    void appendSampledTexture(GrTexture*);
-
     // GrMesh::SendToGpuImpl methods. These issue the actual Vulkan draw commands.
     // Marked final as a hint to the compiler to not use virtual dispatch.
     void sendMeshToGpu(GrPrimitiveType primType, const GrBuffer* vertexBuffer, int vertexCount,
@@ -147,10 +146,6 @@
         SkRect fBounds;
         bool fIsEmpty = true;
         LoadStoreState fLoadStoreState = LoadStoreState::kUnknown;
-        // Array of images that will be sampled and thus need to be transferred to sampled layout
-        // before submitting the secondary command buffers. This must happen after we do any predraw
-        // uploads or copies.
-        SkTArray<sk_sp<GrVkTexture>> fSampledTextures;
 
         GrVkSecondaryCommandBuffer* currentCmdBuf() {
             return fCommandBuffer.get();