Revert "Revert "Reland "Add ability to specify different GP textures for each mesh in a draw."""

This reverts commit cd7907b57d7624c9bb60cd914d175ed72d9b1365.

Readds optimization to draw single texture quads using GrPrimitiveType::kTriangles, non-indexed.

Removes disable on NVIDIA/VK as bounds issue was fixed here:
https://skia.googlesource.com/skia/+/c525d4f7101715d728fca1b7fd7f170115994646

Change-Id: Icbea3de0382c68318db8ecffb2244bc4c5fe84ad
Reviewed-on: https://skia-review.googlesource.com/155500
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/GrCaps.cpp b/src/gpu/GrCaps.cpp
index c2b1452..2027d69 100644
--- a/src/gpu/GrCaps.cpp
+++ b/src/gpu/GrCaps.cpp
@@ -63,6 +63,7 @@
     fFenceSyncSupport = false;
     fCrossContextTextureSupport = false;
     fHalfFloatVertexAttributeSupport = false;
+    fDynamicStateArrayGeometryProcessorTextureSupport = false;
 
     fBlendEquationSupport = kBasic_BlendEquationSupport;
     fAdvBlendEqBlacklist = 0;
@@ -176,6 +177,8 @@
     writer->appendBool("Fence sync support", fFenceSyncSupport);
     writer->appendBool("Cross context texture support", fCrossContextTextureSupport);
     writer->appendBool("Half float vertex attribute support", fHalfFloatVertexAttributeSupport);
+    writer->appendBool("Specify GeometryProcessor textures as a dynamic state array",
+                       fDynamicStateArrayGeometryProcessorTextureSupport);
 
     writer->appendBool("Blacklist Coverage Counting Path Renderer [workaround]",
                        fBlacklistCoverageCounting);
diff --git a/src/gpu/GrCaps.h b/src/gpu/GrCaps.h
index 99992bf..9279fd4 100644
--- a/src/gpu/GrCaps.h
+++ b/src/gpu/GrCaps.h
@@ -241,13 +241,16 @@
 
     bool fenceSyncSupport() const { return fFenceSyncSupport; }
     bool crossContextTextureSupport() const { return fCrossContextTextureSupport; }
-
     /**
      * Returns whether or not we will be able to do a copy given the passed in params
      */
     virtual bool canCopySurface(const GrSurfaceProxy* dst, const GrSurfaceProxy* src,
                                 const SkIRect& srcRect, const SkIPoint& dstPoint) const = 0;
 
+    bool dynamicStateArrayGeometryProcessorTextureSupport() const {
+        return fDynamicStateArrayGeometryProcessorTextureSupport;
+    }
+
     /**
      * This is can be called before allocating a texture to be a dst for copySurface. This is only
      * used for doing dst copies needed in blends, thus the src is always a GrRenderTargetProxy. It
@@ -336,9 +339,12 @@
     // TODO: this may need to be an enum to support different fence types
     bool fFenceSyncSupport                           : 1;
 
-    // Vulkan doesn't support this (yet) and some drivers have issues, too
+    // Requires fence sync support in GL.
     bool fCrossContextTextureSupport                 : 1;
 
+    // Not (yet) implemented in VK backend.
+    bool fDynamicStateArrayGeometryProcessorTextureSupport : 1;
+
     BlendEquationSupport fBlendEquationSupport;
     uint32_t fAdvBlendEqBlacklist;
     GR_STATIC_ASSERT(kLast_GrBlendEquation < 32);
diff --git a/src/gpu/GrGpuCommandBuffer.cpp b/src/gpu/GrGpuCommandBuffer.cpp
index d66f24c..421d057 100644
--- a/src/gpu/GrGpuCommandBuffer.cpp
+++ b/src/gpu/GrGpuCommandBuffer.cpp
@@ -47,9 +47,20 @@
     if (pipeline.isBad()) {
         return false;
     }
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        if (!fixedDynamicState->fPrimitiveProcessorTextures[i]->instantiate(resourceProvider)) {
-            return false;
+    if (fixedDynamicState && fixedDynamicState->fPrimitiveProcessorTextures) {
+        for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
+            if (!fixedDynamicState->fPrimitiveProcessorTextures[i]->instantiate(resourceProvider)) {
+                return false;
+            }
+        }
+    }
+    if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
+        int n = primProc.numTextureSamplers() * meshCount;
+        const auto* textures = dynamicStateArrays->fPrimitiveProcessorTextures;
+        for (int i = 0; i < n; ++i) {
+            if (!textures[i]->instantiate(resourceProvider)) {
+                return false;
+            }
         }
     }
 
diff --git a/src/gpu/GrOpFlushState.cpp b/src/gpu/GrOpFlushState.cpp
index 6385c9f..a1d9229 100644
--- a/src/gpu/GrOpFlushState.cpp
+++ b/src/gpu/GrOpFlushState.cpp
@@ -102,19 +102,28 @@
 
 void GrOpFlushState::draw(sk_sp<const GrGeometryProcessor> gp, const GrPipeline* pipeline,
                           const GrPipeline::FixedDynamicState* fixedDynamicState,
+                          const GrPipeline::DynamicStateArrays* dynamicStateArrays,
                           const GrMesh meshes[], int meshCnt) {
     SkASSERT(fOpArgs);
     SkASSERT(fOpArgs->fOp);
     bool firstDraw = fDraws.begin() == fDraws.end();
     auto& draw = fDraws.append(&fArena);
     GrDeferredUploadToken token = fTokenTracker->issueDrawToken();
-    for (int i = 0; i < gp->numTextureSamplers(); ++i) {
-        fixedDynamicState->fPrimitiveProcessorTextures[i]->addPendingRead();
+    if (fixedDynamicState && fixedDynamicState->fPrimitiveProcessorTextures) {
+        for (int i = 0; i < gp->numTextureSamplers(); ++i) {
+            fixedDynamicState->fPrimitiveProcessorTextures[i]->addPendingRead();
+        }
+    }
+    if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
+        int n = gp->numTextureSamplers() * meshCnt;
+        for (int i = 0; i < n; ++i) {
+            dynamicStateArrays->fPrimitiveProcessorTextures[i]->addPendingRead();
+        }
     }
     draw.fGeometryProcessor = std::move(gp);
     draw.fPipeline = pipeline;
     draw.fFixedDynamicState = fixedDynamicState;
-    draw.fDynamicStateArrays = nullptr;
+    draw.fDynamicStateArrays = dynamicStateArrays;
     draw.fMeshes = meshes;
     draw.fMeshCnt = meshCnt;
     draw.fOpID = fOpArgs->fOp->uniqueID();
@@ -165,3 +174,20 @@
 GrAtlasManager* GrOpFlushState::atlasManager() const {
     return fGpu->getContext()->contextPriv().getAtlasManager();
 }
+
+//////////////////////////////////////////////////////////////////////////////
+
+GrOpFlushState::Draw::~Draw() {
+    if (fFixedDynamicState && fFixedDynamicState->fPrimitiveProcessorTextures) {
+        for (int i = 0; i < fGeometryProcessor->numTextureSamplers(); ++i) {
+            fFixedDynamicState->fPrimitiveProcessorTextures[i]->completedRead();
+        }
+    }
+    if (fDynamicStateArrays && fDynamicStateArrays->fPrimitiveProcessorTextures) {
+        int n = fGeometryProcessor->numTextureSamplers() * fMeshCnt;
+        const auto* textures = fDynamicStateArrays->fPrimitiveProcessorTextures;
+        for (int i = 0; i < n; ++i) {
+            textures[i]->completedRead();
+        }
+    }
+}
diff --git a/src/gpu/GrOpFlushState.h b/src/gpu/GrOpFlushState.h
index 0a46ed2..4746aa2 100644
--- a/src/gpu/GrOpFlushState.h
+++ b/src/gpu/GrOpFlushState.h
@@ -76,8 +76,9 @@
     void draw(sk_sp<const GrGeometryProcessor>,
               const GrPipeline*,
               const GrPipeline::FixedDynamicState*,
+              const GrPipeline::DynamicStateArrays*,
               const GrMesh[],
-              int meshCount) final;
+              int meshCnt) final;
     void* makeVertexSpace(size_t vertexSize, int vertexCount, const GrBuffer**,
                           int* startVertex) final;
     uint16_t* makeIndexSpace(int indexCount, const GrBuffer**, int* startIndex) final;
@@ -120,11 +121,7 @@
     // that share a geometry processor into a Draw is that it allows the Gpu object to setup
     // the shared state once and then issue draws for each mesh.
     struct Draw {
-        ~Draw() {
-            for (int i = 0; i < fGeometryProcessor->numTextureSamplers(); ++i) {
-                fFixedDynamicState->fPrimitiveProcessorTextures[i]->completedRead();
-            }
-        }
+        ~Draw();
         sk_sp<const GrGeometryProcessor> fGeometryProcessor;
         const GrPipeline* fPipeline = nullptr;
         const GrPipeline::FixedDynamicState* fFixedDynamicState;
diff --git a/src/gpu/GrPipeline.h b/src/gpu/GrPipeline.h
index 5817ce8..5e75c65 100644
--- a/src/gpu/GrPipeline.h
+++ b/src/gpu/GrPipeline.h
@@ -73,7 +73,8 @@
         explicit FixedDynamicState(const SkIRect& scissorRect) : fScissorRect(scissorRect) {}
         FixedDynamicState() = default;
         SkIRect fScissorRect = SkIRect::EmptyIRect();
-        // Must have GrPrimitiveProcessor::numTextureSamplers() entries. Can be null if no samplers.
+        // Must have GrPrimitiveProcessor::numTextureSamplers() entries. Can be null if no samplers
+        // or textures are passed using DynamicStateArrays.
         GrTextureProxy** fPrimitiveProcessorTextures = nullptr;
     };
 
@@ -83,6 +84,10 @@
      */
     struct DynamicStateArrays {
         const SkIRect* fScissorRects = nullptr;
+        // Must have GrPrimitiveProcessor::numTextureSamplers() * num_meshes entries.
+        // Can be null if no samplers or to use the same textures for all meshes via'
+        // FixedDynamicState.
+        GrTextureProxy** fPrimitiveProcessorTextures = nullptr;
     };
 
     /**
diff --git a/src/gpu/GrRenderTargetOpList.h b/src/gpu/GrRenderTargetOpList.h
index c15c054..e74a25c 100644
--- a/src/gpu/GrRenderTargetOpList.h
+++ b/src/gpu/GrRenderTargetOpList.h
@@ -179,7 +179,7 @@
     int                            fLastClipNumAnalyticFPs;
 
     // For ops/opList we have mean: 5 stdDev: 28
-    SkSTArray<5, RecordedOp, true> fRecordedOps;
+    SkSTArray<25, RecordedOp, true> fRecordedOps;
 
     // MDB TODO: 4096 for the first allocation of the clip space will be huge overkill.
     // Gather statistics to determine the correct size.
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index f8546c9..2fa332f 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -586,6 +586,8 @@
         fHalfFloatVertexAttributeSupport = true;
     }
 
+    fDynamicStateArrayGeometryProcessorTextureSupport = true;
+
     if (kGL_GrGLStandard == standard) {
         if (version >= GR_GL_VER(4, 1)) {
             fProgramBinarySupport = true;
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index fda222a..b5ee46e 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -1672,9 +1672,11 @@
     }
 }
 
-void GrGLGpu::generateMipmapsForProcessorTextures(const GrPrimitiveProcessor& primProc,
-                                                  const GrPipeline& pipeline,
-                                                  const GrTextureProxy* const primProcTextures[]) {
+void GrGLGpu::resolveAndGenerateMipMapsForProcessorTextures(
+        const GrPrimitiveProcessor& primProc,
+        const GrPipeline& pipeline,
+        const GrTextureProxy* const primProcTextures[],
+        int numPrimitiveProcessorTextureSets) {
     auto genLevelsIfNeeded = [this](GrTexture* tex, const GrSamplerState& sampler) {
         SkASSERT(tex);
         if (sampler.filter() == GrSamplerState::Filter::kMipMap &&
@@ -1682,12 +1684,19 @@
             tex->texturePriv().mipMapsAreDirty()) {
             SkASSERT(this->caps()->mipMapSupport());
             this->regenerateMipMapLevels(static_cast<GrGLTexture*>(tex));
+            SkASSERT(!tex->asRenderTarget() || !tex->asRenderTarget()->needsResolve());
+        } else if (auto* rt = tex->asRenderTarget()) {
+            if (rt->needsResolve()) {
+                this->resolveRenderTarget(rt);
+            }
         }
     };
 
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        GrTexture* tex = primProcTextures[i]->peekTexture();
-        genLevelsIfNeeded(tex, primProc.textureSampler(i).samplerState());
+    for (int set = 0, tex = 0; set < numPrimitiveProcessorTextureSets; ++set) {
+        for (int sampler = 0; sampler < primProc.numTextureSamplers(); ++sampler, ++tex) {
+            GrTexture* texture = primProcTextures[tex]->peekTexture();
+            genLevelsIfNeeded(texture, primProc.textureSampler(sampler).samplerState());
+        }
     }
 
     GrFragmentProcessor::Iter iter(pipeline);
@@ -1702,17 +1711,26 @@
 bool GrGLGpu::flushGLState(const GrPrimitiveProcessor& primProc,
                            const GrPipeline& pipeline,
                            const GrPipeline::FixedDynamicState* fixedDynamicState,
+                           const GrPipeline::DynamicStateArrays* dynamicStateArrays,
+                           int dynamicStateArraysLength,
                            bool willDrawPoints) {
     sk_sp<GrGLProgram> program(fProgramCache->refProgram(this, primProc, pipeline, willDrawPoints));
     if (!program) {
         GrCapsDebugf(this->caps(), "Failed to create program!\n");
         return false;
     }
-    const GrTextureProxy* const* primProcProxies = nullptr;
-    if (fixedDynamicState) {
-        primProcProxies = fixedDynamicState->fPrimitiveProcessorTextures;
+    const GrTextureProxy* const* primProcProxiesForMipRegen = nullptr;
+    const GrTextureProxy* const* primProcProxiesToBind = nullptr;
+    int numPrimProcTextureSets = 1;  // number of texture per prim proc sampler.
+    if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
+        primProcProxiesForMipRegen = dynamicStateArrays->fPrimitiveProcessorTextures;
+        numPrimProcTextureSets = dynamicStateArraysLength;
+    } else if (fixedDynamicState && fixedDynamicState->fPrimitiveProcessorTextures) {
+        primProcProxiesForMipRegen = fixedDynamicState->fPrimitiveProcessorTextures;
+        primProcProxiesToBind = fixedDynamicState->fPrimitiveProcessorTextures;
     }
-    this->generateMipmapsForProcessorTextures(primProc, pipeline, primProcProxies);
+    this->resolveAndGenerateMipMapsForProcessorTextures(
+            primProc, pipeline, primProcProxiesForMipRegen, numPrimProcTextureSets);
 
     GrXferProcessor::BlendInfo blendInfo;
     pipeline.getXferProcessor().getBlendInfo(&blendInfo);
@@ -1729,7 +1747,7 @@
         this->flushBlend(blendInfo, swizzle);
     }
 
-    fHWProgram->updateUniformsAndTextureBindings(primProc, pipeline, primProcProxies);
+    fHWProgram->updateUniformsAndTextureBindings(primProc, pipeline, primProcProxiesToBind);
 
     GrGLRenderTarget* glRT = static_cast<GrGLRenderTarget*>(pipeline.renderTarget());
     GrStencilSettings stencil;
@@ -2271,30 +2289,40 @@
             break;
         }
     }
-    if (!this->flushGLState(primProc, pipeline, fixedDynamicState, hasPoints)) {
+    if (!this->flushGLState(primProc, pipeline, fixedDynamicState, dynamicStateArrays, meshCount,
+                            hasPoints)) {
         return;
     }
 
-    bool dynamicScissor =
-            pipeline.isScissorEnabled() && dynamicStateArrays && dynamicStateArrays->fScissorRects;
-    for (int i = 0; i < meshCount; ++i) {
+    bool dynamicScissor = false;
+    bool dynamicPrimProcTextures = false;
+    if (dynamicStateArrays) {
+        dynamicScissor = pipeline.isScissorEnabled() && dynamicStateArrays->fScissorRects;
+        dynamicPrimProcTextures = dynamicStateArrays->fPrimitiveProcessorTextures;
+    }
+    for (int m = 0; m < meshCount; ++m) {
         if (GrXferBarrierType barrierType = pipeline.xferBarrierType(*this->caps())) {
             this->xferBarrier(pipeline.renderTarget(), barrierType);
         }
 
         if (dynamicScissor) {
             GrGLRenderTarget* glRT = static_cast<GrGLRenderTarget*>(pipeline.renderTarget());
-            this->flushScissor(GrScissorState(dynamicStateArrays->fScissorRects[i]),
+            this->flushScissor(GrScissorState(dynamicStateArrays->fScissorRects[m]),
                                glRT->getViewport(), pipeline.proxy()->origin());
         }
+        if (dynamicPrimProcTextures) {
+            auto texProxyArray = dynamicStateArrays->fPrimitiveProcessorTextures +
+                                 m * primProc.numTextureSamplers();
+            fHWProgram->updatePrimitiveProcessorTextureBindings(primProc, texProxyArray);
+        }
         if (this->glCaps().requiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines() &&
-            GrIsPrimTypeLines(meshes[i].primitiveType()) &&
+            GrIsPrimTypeLines(meshes[m].primitiveType()) &&
             !GrIsPrimTypeLines(fLastPrimitiveType)) {
             GL_CALL(Enable(GR_GL_CULL_FACE));
             GL_CALL(Disable(GR_GL_CULL_FACE));
         }
-        meshes[i].sendToGpu(this);
-        fLastPrimitiveType = meshes[i].primitiveType();
+        meshes[m].sendToGpu(this);
+        fLastPrimitiveType = meshes[m].primitiveType();
     }
 
 #if SWAP_PER_DRAW
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 6b01d65..295ca42 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -251,14 +251,22 @@
 
     void setTextureSwizzle(int unitIdx, GrGLenum target, const GrGLenum swizzle[]);
 
-    void generateMipmapsForProcessorTextures(
+    /**
+     * primitiveProcessorTextures must contain GrPrimitiveProcessor::numTextureSamplers() *
+     * numPrimitiveProcessorTextureSets entries.
+     */
+    void resolveAndGenerateMipMapsForProcessorTextures(
             const GrPrimitiveProcessor&, const GrPipeline&,
-            const GrTextureProxy* const primitiveProcessorTextures[]);
+            const GrTextureProxy* const primitiveProcessorTextures[],
+            int numPrimitiveProcessorTextureSets);
 
     // Flushes state from GrPipeline to GL. Returns false if the state couldn't be set.
     // willDrawPoints must be true if point primitives will be rendered after setting the GL state.
+    // If DynamicStateArrays is not null then dynamicStateArraysLength is the number of dynamic
+    // state entries in each array.
     bool flushGLState(const GrPrimitiveProcessor&, const GrPipeline&,
-                      const GrPipeline::FixedDynamicState*, bool willDrawPoints);
+                      const GrPipeline::FixedDynamicState*, const GrPipeline::DynamicStateArrays*,
+                      int dynamicStateArraysLength, bool willDrawPoints);
 
     void flushProgram(sk_sp<GrGLProgram>);
 
diff --git a/src/gpu/gl/GrGLPathRendering.cpp b/src/gpu/gl/GrGLPathRendering.cpp
index 8bd4c9c..1b3cfbd 100644
--- a/src/gpu/gl/GrGLPathRendering.cpp
+++ b/src/gpu/gl/GrGLPathRendering.cpp
@@ -116,7 +116,7 @@
                                    const GrPipeline::FixedDynamicState& fixedDynamicState,
                                    const GrStencilSettings& stencilPassSettings,
                                    const GrPath* path) {
-    if (!this->gpu()->flushGLState(primProc, pipeline, &fixedDynamicState, false)) {
+    if (!this->gpu()->flushGLState(primProc, pipeline, &fixedDynamicState, nullptr, 1, false)) {
         return;
     }
     const GrGLPath* glPath = static_cast<const GrGLPath*>(path);
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 06aaade..8a5308f 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -75,7 +75,6 @@
 void GrGLProgram::updateUniformsAndTextureBindings(const GrPrimitiveProcessor& primProc,
                                                    const GrPipeline& pipeline,
                                                    const GrTextureProxy* const primProcTextures[]) {
-    SkASSERT(primProcTextures || !primProc.numTextureSamplers());
     this->setRenderTargetState(primProc, pipeline.proxy());
 
     // we set the textures, and uniforms for installed processors in a generic way, but subclasses
@@ -84,13 +83,12 @@
     // We must bind to texture units in the same order in which we set the uniforms in
     // GrGLProgramDataManager. That is, we bind textures for processors in this order:
     // primProc, fragProcs, XP.
-    int nextTexSamplerIdx = 0;
     fPrimitiveProcessor->setData(fProgramDataManager, primProc,
                                  GrFragmentProcessor::CoordTransformIter(pipeline));
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        auto* tex = static_cast<GrGLTexture*>(primProcTextures[i]->peekTexture());
-        fGpu->bindTexture(nextTexSamplerIdx++, primProc.textureSampler(i).samplerState(), tex);
+    if (primProcTextures) {
+        this->updatePrimitiveProcessorTextureBindings(primProc, primProcTextures);
     }
+    int nextTexSamplerIdx = primProc.numTextureSamplers();
 
     this->setFragmentData(pipeline, &nextTexSamplerIdx);
 
@@ -106,6 +104,14 @@
     SkASSERT(nextTexSamplerIdx == fNumTextureSamplers);
 }
 
+void GrGLProgram::updatePrimitiveProcessorTextureBindings(const GrPrimitiveProcessor& primProc,
+                                                          const GrTextureProxy* const proxies[]) {
+    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
+        auto* tex = static_cast<GrGLTexture*>(proxies[i]->peekTexture());
+        fGpu->bindTexture(i, primProc.textureSampler(i).samplerState(), tex);
+    }
+}
+
 void GrGLProgram::setFragmentData(const GrPipeline& pipeline, int* nextTexSamplerIdx) {
     GrFragmentProcessor::Iter iter(pipeline);
     GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
diff --git a/src/gpu/gl/GrGLProgram.h b/src/gpu/gl/GrGLProgram.h
index ca9253c..1a2ef20 100644
--- a/src/gpu/gl/GrGLProgram.h
+++ b/src/gpu/gl/GrGLProgram.h
@@ -112,13 +112,17 @@
 
     /**
      * This function uploads uniforms, calls each GrGLSL*Processor's setData. It binds all fragment
-     * processor textures. Primitive process textures are also bound here but are passed separately.
+     * processor textures. Primitive process textures can be bound using this function or by
+     * calling updatePrimitiveProcessorTextureBindings.
      *
      * It is the caller's responsibility to ensure the program is bound before calling.
      */
     void updateUniformsAndTextureBindings(const GrPrimitiveProcessor&, const GrPipeline&,
                                           const GrTextureProxy* const primitiveProcessorTextures[]);
 
+    void updatePrimitiveProcessorTextureBindings(const GrPrimitiveProcessor&,
+                                                 const GrTextureProxy* const[]);
+
     int vertexStride() const { return fVertexStride; }
     int instanceStride() const { return fInstanceStride; }
 
diff --git a/src/gpu/ops/GrAAConvexPathRenderer.cpp b/src/gpu/ops/GrAAConvexPathRenderer.cpp
index 1cbf5cf..cef10cc 100644
--- a/src/gpu/ops/GrAAConvexPathRenderer.cpp
+++ b/src/gpu/ops/GrAAConvexPathRenderer.cpp
@@ -941,7 +941,7 @@
                 firstIndex += draw.fIndexCnt;
                 firstVertex += draw.fVertexCnt;
             }
-            target->draw(quadProcessor, pipe.fPipeline, pipe.fFixedDynamicState, meshes,
+            target->draw(quadProcessor, pipe.fPipeline, pipe.fFixedDynamicState, nullptr, meshes,
                          draws.count());
         }
     }
diff --git a/src/gpu/ops/GrMeshDrawOp.cpp b/src/gpu/ops/GrMeshDrawOp.cpp
index 620ea47..54d2165 100644
--- a/src/gpu/ops/GrMeshDrawOp.cpp
+++ b/src/gpu/ops/GrMeshDrawOp.cpp
@@ -73,6 +73,29 @@
 
 //////////////////////////////////////////////////////////////////////////////
 
+GrPipeline::FixedDynamicState* GrMeshDrawOp::Target::allocFixedDynamicState(
+        const SkIRect& rect, int numPrimitiveProcessorTextures) {
+    auto result = this->pipelineArena()->make<GrPipeline::FixedDynamicState>(rect);
+    if (numPrimitiveProcessorTextures) {
+        result->fPrimitiveProcessorTextures =
+                this->allocPrimitiveProcessorTextureArray(numPrimitiveProcessorTextures);
+    }
+    return result;
+}
+
+GrPipeline::DynamicStateArrays* GrMeshDrawOp::Target::allocDynamicStateArrays(
+        int numMeshes, int numPrimitiveProcessorTextures, bool allocScissors) {
+    auto result = this->pipelineArena()->make<GrPipeline::DynamicStateArrays>();
+    if (allocScissors) {
+        result->fScissorRects = this->pipelineArena()->makeArray<SkIRect>(numMeshes);
+    }
+    if (numPrimitiveProcessorTextures) {
+        result->fPrimitiveProcessorTextures = this->allocPrimitiveProcessorTextureArray(
+                numPrimitiveProcessorTextures * numMeshes);
+    }
+    return result;
+}
+
 GrMeshDrawOp::Target::PipelineAndFixedDynamicState GrMeshDrawOp::Target::makePipeline(
         uint32_t pipelineFlags, GrProcessorSet&& processorSet, GrAppliedClip&& clip,
         int numPrimProcTextures) {
diff --git a/src/gpu/ops/GrMeshDrawOp.h b/src/gpu/ops/GrMeshDrawOp.h
index a6e7da5..808a1d4 100644
--- a/src/gpu/ops/GrMeshDrawOp.h
+++ b/src/gpu/ops/GrMeshDrawOp.h
@@ -83,15 +83,15 @@
     virtual void draw(sk_sp<const GrGeometryProcessor>,
                       const GrPipeline*,
                       const GrPipeline::FixedDynamicState*,
+                      const GrPipeline::DynamicStateArrays*,
                       const GrMesh[],
                       int meshCount) = 0;
-
     /** Helper for drawing a single GrMesh. */
     void draw(sk_sp<const GrGeometryProcessor> gp,
               const GrPipeline* pipeline,
               const GrPipeline::FixedDynamicState* fixedDynamicState,
               const GrMesh* mesh) {
-        this->draw(std::move(gp), pipeline, fixedDynamicState, mesh, 1);
+        this->draw(std::move(gp), pipeline, fixedDynamicState, nullptr, mesh, 1);
     }
 
     /**
@@ -152,14 +152,11 @@
     GrMesh* allocMeshes(int n) { return this->pipelineArena()->makeArray<GrMesh>(n); }
 
     GrPipeline::FixedDynamicState* allocFixedDynamicState(const SkIRect& rect,
-                                                          int numPrimitiveProcessorTextures = 0) {
-        auto result = this->pipelineArena()->make<GrPipeline::FixedDynamicState>(rect);
-        if (numPrimitiveProcessorTextures) {
-            result->fPrimitiveProcessorTextures =
-                    this->allocPrimitiveProcessorTextureArray(numPrimitiveProcessorTextures);
-        }
-        return result;
-    }
+                                                          int numPrimitiveProcessorTextures = 0);
+
+    GrPipeline::DynamicStateArrays* allocDynamicStateArrays(int numMeshes,
+                                                            int numPrimitiveProcessorTextures,
+                                                            bool allocScissors);
 
     GrTextureProxy** allocPrimitiveProcessorTextureArray(int n) {
         SkASSERT(n > 0);
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index ba37048..620b8f5 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -611,7 +611,7 @@
     }
 
     template <typename Pos, Domain D, GrAA AA>
-    void tess(void* v, const GrGeometryProcessor* gp) {
+    void tess(void* v, const GrGeometryProcessor* gp) const {
         using Vertex = TextureGeometryProcessor::Vertex<Pos, D, AA>;
         SkASSERT(gp->debugOnly_vertexStride() == sizeof(Vertex));
         auto vertices = static_cast<Vertex*>(v);
@@ -628,16 +628,25 @@
     }
 
     void onPrepareDraws(Target* target) override {
-        if (!fProxy->instantiate(target->resourceProvider())) {
-            return;
+        bool hasPerspective = false;
+        Domain domain = Domain::kNo;
+        int numOps = 0;
+        for (const auto& op : ChainRange<TextureOp>(this)) {
+            ++numOps;
+            hasPerspective |= op.fPerspective;
+            if (op.fDomain) {
+                domain = Domain::kYes;
+            }
+            if (!op.fProxy->instantiate(target->resourceProvider())) {
+                return;
+            }
         }
 
-        Domain domain = fDomain ? Domain::kYes : Domain::kNo;
         bool coverageAA = GrAAType::kCoverage == this->aaType();
         sk_sp<GrGeometryProcessor> gp = TextureGeometryProcessor::Make(
                 fProxy->textureType(), fProxy->config(), fFilter,
                 std::move(fTextureColorSpaceXform), std::move(fPaintColorSpaceXform), coverageAA,
-                fPerspective, domain, *target->caps().shaderCaps());
+                hasPerspective, domain, *target->caps().shaderCaps());
         GrPipeline::InitArgs args;
         args.fProxy = target->proxy();
         args.fCaps = &target->caps();
@@ -648,8 +657,17 @@
         }
 
         auto clip = target->detachAppliedClip();
-        auto* fixedDynamicState = target->allocFixedDynamicState(clip.scissorState().rect(), 1);
-        fixedDynamicState->fPrimitiveProcessorTextures[0] = fProxy;
+        // We'll use a dynamic state array for the GP textures when there are multiple ops.
+        // Otherwise, we use fixed dynamic state to specify the single op's proxy.
+        GrPipeline::DynamicStateArrays* dynamicStateArrays = nullptr;
+        GrPipeline::FixedDynamicState* fixedDynamicState;
+        if (numOps > 1) {
+            dynamicStateArrays = target->allocDynamicStateArrays(numOps, 1, false);
+            fixedDynamicState = target->allocFixedDynamicState(clip.scissorState().rect(), 0);
+        } else {
+            fixedDynamicState = target->allocFixedDynamicState(clip.scissorState().rect(), 1);
+            fixedDynamicState->fPrimitiveProcessorTextures[0] = fProxy;
+        }
         const auto* pipeline =
                 target->allocPipeline(args, GrProcessorSet::MakeEmptySet(), std::move(clip));
         using TessFn = decltype(&TextureOp::tess<SkPoint, Domain::kNo, GrAA::kNo>);
@@ -673,42 +691,50 @@
         };
 #undef TESS_FN_AND_VERTEX_SIZE
         int tessFnIdx = 0;
-        tessFnIdx |= coverageAA   ? 0x1 : 0x0;
-        tessFnIdx |= fDomain      ? 0x2 : 0x0;
-        tessFnIdx |= fPerspective ? 0x4 : 0x0;
+        tessFnIdx |= coverageAA               ? 0x1 : 0x0;
+        tessFnIdx |= (domain == Domain::kYes) ? 0x2 : 0x0;
+        tessFnIdx |= hasPerspective           ? 0x4 : 0x0;
 
         SkASSERT(kTessFnsAndVertexSizes[tessFnIdx].fVertexSize == gp->debugOnly_vertexStride());
 
-        int vstart;
-        const GrBuffer* vbuffer;
-        void* vdata = target->makeVertexSpace(kTessFnsAndVertexSizes[tessFnIdx].fVertexSize,
-                                              4 * fDraws.count(), &vbuffer, &vstart);
-        if (!vdata) {
-            SkDebugf("Could not allocate vertices\n");
-            return;
-        }
-
-        (this->*(kTessFnsAndVertexSizes[tessFnIdx].fTessFn))(vdata, gp.get());
-
-        GrPrimitiveType primitiveType =
-                fDraws.count() > 1 ? GrPrimitiveType::kTriangles : GrPrimitiveType::kTriangleStrip;
-        GrMesh* mesh = target->allocMesh(primitiveType);
-        if (fDraws.count() > 1) {
-            sk_sp<const GrBuffer> ibuffer = target->resourceProvider()->refQuadIndexBuffer();
-            if (!ibuffer) {
-                SkDebugf("Could not allocate quad indices\n");
+        GrMesh* meshes = target->allocMeshes(numOps);
+        int i = 0;
+        for (const auto& op : ChainRange<TextureOp>(this)) {
+            int vstart;
+            const GrBuffer* vbuffer;
+            void* vdata = target->makeVertexSpace(kTessFnsAndVertexSizes[tessFnIdx].fVertexSize,
+                                                  4 * op.fDraws.count(), &vbuffer, &vstart);
+            if (!vdata) {
+                SkDebugf("Could not allocate vertices\n");
                 return;
             }
-            mesh->setIndexedPatterned(ibuffer.get(), 6, 4, fDraws.count(),
-                                      GrResourceProvider::QuadCountOfQuadBuffer());
-        } else {
-            mesh->setNonIndexedNonInstanced(4);
+
+            (op.*(kTessFnsAndVertexSizes[tessFnIdx].fTessFn))(vdata, gp.get());
+
+            if (op.fDraws.count() > 1) {
+                meshes[i].setPrimitiveType(GrPrimitiveType::kTriangles);
+                sk_sp<const GrBuffer> ibuffer = target->resourceProvider()->refQuadIndexBuffer();
+                if (!ibuffer) {
+                    SkDebugf("Could not allocate quad indices\n");
+                    return;
+                }
+                meshes[i].setIndexedPatterned(ibuffer.get(), 6, 4, op.fDraws.count(),
+                                              GrResourceProvider::QuadCountOfQuadBuffer());
+            } else {
+                meshes[i].setPrimitiveType(GrPrimitiveType::kTriangleStrip);
+                meshes[i].setNonIndexedNonInstanced(4);
+            }
+            meshes[i].setVertexData(vbuffer, vstart);
+            if (dynamicStateArrays) {
+                dynamicStateArrays->fPrimitiveProcessorTextures[i] = op.fProxy;
+            }
+            ++i;
         }
-        mesh->setVertexData(vbuffer, vstart);
-        target->draw(std::move(gp), pipeline, fixedDynamicState, mesh);
+        target->draw(std::move(gp), pipeline, fixedDynamicState, dynamicStateArrays, meshes,
+                     numOps);
     }
 
-    CombineResult onCombineIfPossible(GrOp* t, const GrCaps&) override {
+    CombineResult onCombineIfPossible(GrOp* t, const GrCaps& caps) override {
         const auto* that = t->cast<TextureOp>();
         if (!GrColorSpaceXform::Equals(fTextureColorSpaceXform.get(),
                                        that->fTextureColorSpaceXform.get())) {
@@ -721,7 +747,17 @@
         if (this->aaType() != that->aaType()) {
             return CombineResult::kCannotCombine;
         }
-        if (fProxy->uniqueID() != that->fProxy->uniqueID() || fFilter != that->fFilter) {
+        if (fFilter != that->fFilter) {
+            return CombineResult::kCannotCombine;
+        }
+        if (fProxy->uniqueID() != that->fProxy->uniqueID() || that->isChained()) {
+            // We can't merge across different proxies (and we're disallowed from merging when
+            // 'that' is chained. Check if we can be chained with 'that'.
+            if (fProxy->config() == that->fProxy->config() &&
+                fProxy->textureType() == that->fProxy->textureType() &&
+                caps.dynamicStateArrayGeometryProcessorTextureSupport()) {
+                return CombineResult::kMayChain;
+            }
             return CombineResult::kCannotCombine;
         }
         fDraws.push_back_n(that->fDraws.count(), that->fDraws.begin());
diff --git a/src/gpu/vk/GrVkCaps.cpp b/src/gpu/vk/GrVkCaps.cpp
index 27c2937..6435c14 100644
--- a/src/gpu/vk/GrVkCaps.cpp
+++ b/src/gpu/vk/GrVkCaps.cpp
@@ -21,8 +21,8 @@
     : INHERITED(contextOptions) {
 
     /**************************************************************************
-    * GrDrawTargetCaps fields
-    **************************************************************************/
+     * GrCaps fields
+     **************************************************************************/
     fMipMapSupport = true;   // always available in Vulkan
     fSRGBSupport = true;   // always available in Vulkan
     fNPOTTextureTileSupport = true;  // always available in Vulkan
@@ -43,6 +43,8 @@
     fMaxRenderTargetSize = 4096; // minimum required by spec
     fMaxTextureSize = 4096; // minimum required by spec
 
+    fDynamicStateArrayGeometryProcessorTextureSupport = true;
+
     fShaderCaps.reset(new GrShaderCaps(contextOptions));
 
     this->init(contextOptions, vkInterface, physDev, features, extensions);
@@ -340,7 +342,6 @@
     if (kImagination_VkVendor == properties.vendorID) {
         fShaderCaps->fAtan2ImplementedAsAtanYOverX = true;
     }
-
 }
 
 int get_max_sample_count(VkSampleCountFlags flags) {
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index caff3ca..7b4a92d 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -243,7 +243,6 @@
                                                          dynamicOffsetCount,
                                                          dynamicOffsets));
     this->addRecordingResource(layout);
-    pipelineState->addUniformResources(*this);
 }
 
 void GrVkCommandBuffer::bindDescriptorSets(const GrVkGpu* gpu,
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
index 44cf980..3ecb340 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
@@ -605,13 +605,20 @@
     }
     fLastPipelineState = pipelineState;
 
-    const GrTextureProxy* const* primProcProxies = nullptr;
-    if (fixedDynamicState) {
-        primProcProxies = fixedDynamicState->fPrimitiveProcessorTextures;
-    }
-    pipelineState->setData(fGpu, primProc, pipeline, primProcProxies);
+    pipelineState->bindPipeline(fGpu, cbInfo.currentCmdBuf());
 
-    pipelineState->bind(fGpu, cbInfo.currentCmdBuf());
+    pipelineState->setAndBindUniforms(fGpu, primProc, pipeline, cbInfo.currentCmdBuf());
+
+    // Check whether we need to bind textures between each GrMesh. If not we can bind them all now.
+    bool setTextures = !(dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures);
+    if (setTextures) {
+        const GrTextureProxy* const* primProcProxies = nullptr;
+        if (fixedDynamicState) {
+            primProcProxies = fixedDynamicState->fPrimitiveProcessorTextures;
+        }
+        pipelineState->setAndBindTextures(fGpu, primProc, pipeline, primProcProxies,
+                                          cbInfo.currentCmdBuf());
+    }
 
     GrRenderTarget* rt = pipeline.renderTarget();
 
@@ -666,9 +673,18 @@
         cbInfo.fSampledImages.push_back(vkTexture);
     };
 
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        auto texture = fixedDynamicState->fPrimitiveProcessorTextures[i]->peekTexture();
-        prepareSampledImage(texture, primProc.textureSampler(i).samplerState().filter());
+    if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
+        for (int m = 0, i = 0; m < meshCount; ++m) {
+            for (int s = 0; s < primProc.numTextureSamplers(); ++s, ++i) {
+                auto texture = dynamicStateArrays->fPrimitiveProcessorTextures[i]->peekTexture();
+                prepareSampledImage(texture, primProc.textureSampler(s).samplerState().filter());
+            }
+        }
+    } else {
+        for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
+            auto texture = fixedDynamicState->fPrimitiveProcessorTextures[i]->peekTexture();
+            prepareSampledImage(texture, primProc.textureSampler(i).samplerState().filter());
+        }
     }
     GrFragmentProcessor::Iter iter(pipeline);
     while (const GrFragmentProcessor* fp = iter.next()) {
@@ -690,14 +706,11 @@
 
     bool dynamicScissor =
             pipeline.isScissorEnabled() && dynamicStateArrays && dynamicStateArrays->fScissorRects;
+    bool dynamicTextures = dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures;
 
     for (int i = 0; i < meshCount; ++i) {
         const GrMesh& mesh = meshes[i];
         if (mesh.primitiveType() != primitiveType) {
-            // Technically we don't have to call this here (since there is a safety check in
-            // pipelineState:setData but this will allow for quicker freeing of resources if the
-            // pipelineState sits in a cache for a while.
-            pipelineState->freeTempResources(fGpu);
             SkDEBUGCODE(pipelineState = nullptr);
             primitiveType = mesh.primitiveType();
             pipelineState = this->prepareDrawState(primProc, pipeline, fixedDynamicState,
@@ -712,18 +725,18 @@
                                                      pipeline.proxy()->origin(),
                                                      dynamicStateArrays->fScissorRects[i]);
         }
-
+        if (dynamicTextures) {
+            GrTextureProxy* const* meshProxies = dynamicStateArrays->fPrimitiveProcessorTextures +
+                                                 primProc.numTextureSamplers() * i;
+            pipelineState->setAndBindTextures(fGpu, primProc, pipeline, meshProxies,
+                                              cbInfo.currentCmdBuf());
+        }
         SkASSERT(pipelineState);
         mesh.sendToGpu(this);
     }
 
     cbInfo.fBounds.join(bounds);
     cbInfo.fIsEmpty = false;
-
-    // Technically we don't have to call this here (since there is a safety check in
-    // pipelineState:setData but this will allow for quicker freeing of resources if the
-    // pipelineState sits in a cache for a while.
-    pipelineState->freeTempResources(fGpu);
 }
 
 void GrVkGpuRTCommandBuffer::sendInstancedMeshToGpu(GrPrimitiveType,
diff --git a/src/gpu/vk/GrVkPipelineState.cpp b/src/gpu/vk/GrVkPipelineState.cpp
index c57fa1b..694c846 100644
--- a/src/gpu/vk/GrVkPipelineState.cpp
+++ b/src/gpu/vk/GrVkPipelineState.cpp
@@ -53,10 +53,6 @@
         , fFragmentProcessors(std::move(fragmentProcessors))
         , fFragmentProcessorCnt(fragmentProcessorCnt)
         , fDataManager(uniforms, geometryUniformSize, fragmentUniformSize) {
-    fSamplers.setReserve(numSamplers);
-    fTextureViews.setReserve(numSamplers);
-    fTextures.setReserve(numSamplers);
-
     fDescriptorSets[0] = VK_NULL_HANDLE;
     fDescriptorSets[1] = VK_NULL_HANDLE;
     fDescriptorSets[2] = VK_NULL_HANDLE;
@@ -71,26 +67,6 @@
     // Must have freed all GPU resources before this is destroyed
     SkASSERT(!fPipeline);
     SkASSERT(!fPipelineLayout);
-    SkASSERT(!fSamplers.count());
-    SkASSERT(!fTextureViews.count());
-    SkASSERT(!fTextures.count());
-}
-
-void GrVkPipelineState::freeTempResources(const GrVkGpu* gpu) {
-    for (int i = 0; i < fSamplers.count(); ++i) {
-        fSamplers[i]->unref(gpu);
-    }
-    fSamplers.rewind();
-
-    for (int i = 0; i < fTextureViews.count(); ++i) {
-        fTextureViews[i]->unref(gpu);
-    }
-    fTextureViews.rewind();
-
-    for (int i = 0; i < fTextures.count(); ++i) {
-        fTextures[i]->unref(gpu);
-    }
-    fTextures.rewind();
 }
 
 void GrVkPipelineState::freeGPUResources(const GrVkGpu* gpu) {
@@ -123,8 +99,6 @@
         fSamplerDescriptorSet->recycle(const_cast<GrVkGpu*>(gpu));
         fSamplerDescriptorSet = nullptr;
     }
-
-    this->freeTempResources(gpu);
 }
 
 void GrVkPipelineState::abandonGPUResources() {
@@ -148,21 +122,6 @@
         fFragmentUniformBuffer.reset();
     }
 
-    for (int i = 0; i < fSamplers.count(); ++i) {
-        fSamplers[i]->unrefAndAbandon();
-    }
-    fSamplers.rewind();
-
-    for (int i = 0; i < fTextureViews.count(); ++i) {
-        fTextureViews[i]->unrefAndAbandon();
-    }
-    fTextureViews.rewind();
-
-    for (int i = 0; i < fTextures.count(); ++i) {
-        fTextures[i]->unrefAndAbandon();
-    }
-    fTextures.rewind();
-
     if (fUniformDescriptorSet) {
         fUniformDescriptorSet->unrefAndAbandon();
         fUniformDescriptorSet = nullptr;
@@ -174,17 +133,70 @@
     }
 }
 
-void GrVkPipelineState::setData(GrVkGpu* gpu,
-                                const GrPrimitiveProcessor& primProc,
-                                const GrPipeline& pipeline,
-                                const GrTextureProxy* const primProcTextures[]) {
-    SkASSERT(primProcTextures || !primProc.numTextureSamplers());
-    // This is here to protect against someone calling setData multiple times in a row without
-    // freeing the tempData between calls.
-    this->freeTempResources(gpu);
-
+void GrVkPipelineState::setAndBindUniforms(GrVkGpu* gpu,
+                                           const GrPrimitiveProcessor& primProc,
+                                           const GrPipeline& pipeline,
+                                           GrVkCommandBuffer* commandBuffer) {
     this->setRenderTargetState(pipeline.proxy());
 
+    fGeometryProcessor->setData(fDataManager, primProc,
+                                GrFragmentProcessor::CoordTransformIter(pipeline));
+    GrFragmentProcessor::Iter iter(pipeline);
+    GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
+    const GrFragmentProcessor* fp = iter.next();
+    GrGLSLFragmentProcessor* glslFP = glslIter.next();
+    while (fp && glslFP) {
+        glslFP->setData(fDataManager, *fp);
+        fp = iter.next();
+        glslFP = glslIter.next();
+    }
+    SkASSERT(!fp && !glslFP);
+
+    {
+        SkIPoint offset;
+        GrTexture* dstTexture = pipeline.peekDstTexture(&offset);
+
+        fXferProcessor->setData(fDataManager, pipeline.getXferProcessor(), dstTexture, offset);
+    }
+
+    // Get new descriptor set
+    if (fGeometryUniformBuffer || fFragmentUniformBuffer) {
+        int uniformDSIdx = GrVkUniformHandler::kUniformBufferDescSet;
+        if (fDataManager.uploadUniformBuffers(
+                    gpu, fGeometryUniformBuffer.get(), fFragmentUniformBuffer.get()) ||
+            !fUniformDescriptorSet) {
+            if (fUniformDescriptorSet) {
+                fUniformDescriptorSet->recycle(gpu);
+            }
+            fUniformDescriptorSet = gpu->resourceProvider().getUniformDescriptorSet();
+            fDescriptorSets[uniformDSIdx] = fUniformDescriptorSet->descriptorSet();
+            this->writeUniformBuffers(gpu);
+        }
+        commandBuffer->bindDescriptorSets(gpu, this, fPipelineLayout, uniformDSIdx, 1,
+                                          &fDescriptorSets[uniformDSIdx], 0, nullptr);
+        if (fUniformDescriptorSet) {
+            commandBuffer->addRecycledResource(fUniformDescriptorSet);
+        }
+        if (fGeometryUniformBuffer) {
+            commandBuffer->addRecycledResource(fGeometryUniformBuffer->resource());
+        }
+        if (fFragmentUniformBuffer) {
+            commandBuffer->addRecycledResource(fFragmentUniformBuffer->resource());
+        }
+    }
+}
+
+void GrVkPipelineState::setAndBindTextures(GrVkGpu* gpu,
+                                           const GrPrimitiveProcessor& primProc,
+                                           const GrPipeline& pipeline,
+                                           const GrTextureProxy* const primProcTextures[],
+                                           GrVkCommandBuffer* commandBuffer) {
+    SkASSERT(primProcTextures || !primProc.numTextureSamplers());
+
+    struct SamplerBindings {
+        GrSamplerState fState;
+        GrVkTexture* fTexture;
+    };
     SkAutoSTMalloc<8, SamplerBindings> samplerBindings(fNumSamplers);
     int currTextureBinding = 0;
 
@@ -201,7 +213,6 @@
     const GrFragmentProcessor* fp = iter.next();
     GrGLSLFragmentProcessor* glslFP = glslIter.next();
     while (fp && glslFP) {
-        glslFP->setData(fDataManager, *fp);
         for (int i = 0; i < fp->numTextureSamplers(); ++i) {
             const auto& sampler = fp->textureSampler(i);
             samplerBindings[currTextureBinding++] =
@@ -212,20 +223,13 @@
     }
     SkASSERT(!fp && !glslFP);
 
-    {
-        SkIPoint offset;
-        GrTexture* dstTexture = pipeline.peekDstTexture(&offset);
-
-        fXferProcessor->setData(fDataManager, pipeline.getXferProcessor(), dstTexture, offset);
-    }
-
     if (GrTextureProxy* dstTextureProxy = pipeline.dstTextureProxy()) {
         samplerBindings[currTextureBinding++] = {
                 GrSamplerState::ClampNearest(),
                 static_cast<GrVkTexture*>(dstTextureProxy->peekTexture())};
     }
 
-    // Get new descriptor sets
+    // Get new descriptor set
     SkASSERT(fNumSamplers == currTextureBinding);
     if (fNumSamplers) {
         if (fSamplerDescriptorSet) {
@@ -234,23 +238,44 @@
         fSamplerDescriptorSet = gpu->resourceProvider().getSamplerDescriptorSet(fSamplerDSHandle);
         int samplerDSIdx = GrVkUniformHandler::kSamplerDescSet;
         fDescriptorSets[samplerDSIdx] = fSamplerDescriptorSet->descriptorSet();
-        this->writeSamplers(gpu, samplerBindings.get());
-    }
+        for (int i = 0; i < fNumSamplers; ++i) {
+            const GrSamplerState& state = samplerBindings[i].fState;
+            GrVkTexture* texture = samplerBindings[i].fTexture;
 
-    if (fGeometryUniformBuffer || fFragmentUniformBuffer) {
-        if (fDataManager.uploadUniformBuffers(gpu,
-                                              fGeometryUniformBuffer.get(),
-                                              fFragmentUniformBuffer.get())
-            || !fUniformDescriptorSet)
-        {
-            if (fUniformDescriptorSet) {
-                fUniformDescriptorSet->recycle(gpu);
-            }
-            fUniformDescriptorSet = gpu->resourceProvider().getUniformDescriptorSet();
-            int uniformDSIdx = GrVkUniformHandler::kUniformBufferDescSet;
-            fDescriptorSets[uniformDSIdx] = fUniformDescriptorSet->descriptorSet();
-            this->writeUniformBuffers(gpu);
+            const GrVkImageView* textureView = texture->textureView();
+            GrVkSampler* sampler = gpu->resourceProvider().findOrCreateCompatibleSampler(
+                    state, texture->texturePriv().maxMipMapLevel());
+
+            VkDescriptorImageInfo imageInfo;
+            memset(&imageInfo, 0, sizeof(VkDescriptorImageInfo));
+            imageInfo.sampler = sampler->sampler();
+            imageInfo.imageView = textureView->imageView();
+            imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+            VkWriteDescriptorSet writeInfo;
+            memset(&writeInfo, 0, sizeof(VkWriteDescriptorSet));
+            writeInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+            writeInfo.pNext = nullptr;
+            writeInfo.dstSet = fDescriptorSets[GrVkUniformHandler::kSamplerDescSet];
+            writeInfo.dstBinding = i;
+            writeInfo.dstArrayElement = 0;
+            writeInfo.descriptorCount = 1;
+            writeInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+            writeInfo.pImageInfo = &imageInfo;
+            writeInfo.pBufferInfo = nullptr;
+            writeInfo.pTexelBufferView = nullptr;
+
+            GR_VK_CALL(gpu->vkInterface(),
+                       UpdateDescriptorSets(gpu->device(), 1, &writeInfo, 0, nullptr));
+            commandBuffer->addResource(sampler);
+            sampler->unref(gpu);
+            commandBuffer->addResource(samplerBindings[i].fTexture->textureView());
+            commandBuffer->addResource(samplerBindings[i].fTexture->resource());
         }
+
+        commandBuffer->bindDescriptorSets(gpu, this, fPipelineLayout, samplerDSIdx, 1,
+                                          &fDescriptorSets[samplerDSIdx], 0, nullptr);
+        commandBuffer->addRecycledResource(fSamplerDescriptorSet);
     }
 }
 
@@ -312,49 +337,6 @@
     }
 }
 
-void GrVkPipelineState::writeSamplers(GrVkGpu* gpu, const SamplerBindings bindings[]) {
-    for (int i = 0; i < fNumSamplers; ++i) {
-        const GrSamplerState& state = bindings[i].fState;
-        GrVkTexture* texture = bindings[i].fTexture;
-
-        fSamplers.push_back(gpu->resourceProvider().findOrCreateCompatibleSampler(
-                state, texture->texturePriv().maxMipMapLevel()));
-
-        const GrVkResource* textureResource = texture->resource();
-        textureResource->ref();
-        fTextures.push_back(textureResource);
-
-        const GrVkImageView* textureView = texture->textureView();
-        textureView->ref();
-        fTextureViews.push_back(textureView);
-
-        VkDescriptorImageInfo imageInfo;
-        memset(&imageInfo, 0, sizeof(VkDescriptorImageInfo));
-        imageInfo.sampler = fSamplers[i]->sampler();
-        imageInfo.imageView = textureView->imageView();
-        imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-
-        VkWriteDescriptorSet writeInfo;
-        memset(&writeInfo, 0, sizeof(VkWriteDescriptorSet));
-        writeInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-        writeInfo.pNext = nullptr;
-        writeInfo.dstSet = fDescriptorSets[GrVkUniformHandler::kSamplerDescSet];
-        writeInfo.dstBinding = i;
-        writeInfo.dstArrayElement = 0;
-        writeInfo.descriptorCount = 1;
-        writeInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
-        writeInfo.pImageInfo = &imageInfo;
-        writeInfo.pBufferInfo = nullptr;
-        writeInfo.pTexelBufferView = nullptr;
-
-        GR_VK_CALL(gpu->vkInterface(), UpdateDescriptorSets(gpu->device(),
-                                                            1,
-                                                            &writeInfo,
-                                                            0,
-                                                            nullptr));
-    }
-}
-
 void GrVkPipelineState::setRenderTargetState(const GrRenderTargetProxy* proxy) {
     GrRenderTarget* rt = proxy->peekRenderTarget();
 
@@ -379,47 +361,6 @@
     }
 }
 
-void GrVkPipelineState::bind(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer) {
+void GrVkPipelineState::bindPipeline(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer) {
     commandBuffer->bindPipeline(gpu, fPipeline);
-
-    if (fGeometryUniformBuffer || fFragmentUniformBuffer) {
-        int dsIndex = GrVkUniformHandler::kUniformBufferDescSet;
-        commandBuffer->bindDescriptorSets(gpu, this, fPipelineLayout,
-                                          dsIndex, 1,
-                                          &fDescriptorSets[dsIndex], 0, nullptr);
-    }
-    if (fNumSamplers) {
-        int dsIndex = GrVkUniformHandler::kSamplerDescSet;
-        commandBuffer->bindDescriptorSets(gpu, this, fPipelineLayout,
-                                          dsIndex, 1,
-                                          &fDescriptorSets[dsIndex], 0, nullptr);
-    }
-}
-
-void GrVkPipelineState::addUniformResources(GrVkCommandBuffer& commandBuffer) {
-    if (fUniformDescriptorSet) {
-        commandBuffer.addRecycledResource(fUniformDescriptorSet);
-    }
-    if (fSamplerDescriptorSet) {
-        commandBuffer.addRecycledResource(fSamplerDescriptorSet);
-    }
-
-    if (fGeometryUniformBuffer.get()) {
-        commandBuffer.addRecycledResource(fGeometryUniformBuffer->resource());
-    }
-    if (fFragmentUniformBuffer.get()) {
-        commandBuffer.addRecycledResource(fFragmentUniformBuffer->resource());
-    }
-
-    for (int i = 0; i < fSamplers.count(); ++i) {
-        commandBuffer.addResource(fSamplers[i]);
-    }
-
-    for (int i = 0; i < fTextureViews.count(); ++i) {
-        commandBuffer.addResource(fTextureViews[i]);
-    }
-
-    for (int i = 0; i < fTextures.count(); ++i) {
-        commandBuffer.addResource(fTextures[i]);
-    }
 }
diff --git a/src/gpu/vk/GrVkPipelineState.h b/src/gpu/vk/GrVkPipelineState.h
index d61359c..994d0a9 100644
--- a/src/gpu/vk/GrVkPipelineState.h
+++ b/src/gpu/vk/GrVkPipelineState.h
@@ -56,35 +56,32 @@
 
     ~GrVkPipelineState();
 
-    void setData(GrVkGpu*, const GrPrimitiveProcessor&, const GrPipeline&,
-                 const GrTextureProxy* const primitiveProcessorTextures[]);
+    void setAndBindUniforms(GrVkGpu*, const GrPrimitiveProcessor&, const GrPipeline&,
+                            GrVkCommandBuffer*);
+    /**
+     * This must be called after setAndBindUniforms() since that function invalidates texture
+     * bindings.
+     */
+    void setAndBindTextures(GrVkGpu*, const GrPrimitiveProcessor&, const GrPipeline&,
+                            const GrTextureProxy* const primitiveProcessorTextures[],
+                            GrVkCommandBuffer*);
 
-    void bind(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer);
+    void bindPipeline(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer);
 
-    void addUniformResources(GrVkCommandBuffer&);
+    void addUniformResources(GrVkCommandBuffer&, GrVkSampler*[], GrVkTexture*[], int numTextures);
 
     void freeGPUResources(const GrVkGpu* gpu);
 
-    // This releases resources that only a given instance of a GrVkPipelineState needs to hold onto
-    // and don't need to survive across new uses of the GrVkPipelineState.
-    void freeTempResources(const GrVkGpu* gpu);
-
     void abandonGPUResources();
 
 private:
     void writeUniformBuffers(const GrVkGpu* gpu);
 
-    struct SamplerBindings {
-        GrSamplerState fState;
-        GrVkTexture* fTexture;
-    };
-    void writeSamplers(GrVkGpu* gpu, const SamplerBindings[]);
-
     /**
-    * We use the RT's size and origin to adjust from Skia device space to vulkan normalized device
-    * space and to make device space positions have the correct origin for processors that require
-    * them.
-    */
+     * We use the RT's size and origin to adjust from Skia device space to vulkan normalized device
+     * space and to make device space positions have the correct origin for processors that require
+     * them.
+     */
     struct RenderTargetState {
         SkISize         fRenderTargetSize;
         GrSurfaceOrigin fRenderTargetOrigin;
@@ -141,11 +138,6 @@
     std::unique_ptr<GrVkUniformBuffer> fGeometryUniformBuffer;
     std::unique_ptr<GrVkUniformBuffer> fFragmentUniformBuffer;
 
-    // GrVkResources used for sampling textures
-    SkTDArray<GrVkSampler*> fSamplers;
-    SkTDArray<const GrVkImageView*> fTextureViews;
-    SkTDArray<const GrVkResource*> fTextures;
-
     // Tracks the current render target uniforms stored in the vertex buffer.
     RenderTargetState fRenderTargetState;
     GrGLSLBuiltinUniformHandles fBuiltinUniformHandles;
diff --git a/src/gpu/vk/GrVkUniformHandler.h b/src/gpu/vk/GrVkUniformHandler.h
index bd8f8f3..d79b1b3 100644
--- a/src/gpu/vk/GrVkUniformHandler.h
+++ b/src/gpu/vk/GrVkUniformHandler.h
@@ -17,6 +17,12 @@
     static const int kUniformsPerBlock = 8;
 
     enum {
+        /**
+         * Binding a descriptor set invalidates all higher index descriptor sets. We must bind
+         * in the order of this enumeration. Samplers are after Uniforms because GrOps can specify
+         * GP textures as dynamic state, meaning they get rebound for each GrMesh in a draw while
+         * uniforms are bound once before all the draws.
+         */
         kUniformBufferDescSet = 0,
         kSamplerDescSet = 1,
     };