Reland "Add ability to specify different GP textures for each mesh in a draw."

This reverts commit deeb655912b90d9b19d692f4b6ebe55ff4ce90cb.

Reason for revert: Fix landed for incorrect chaining logic: https://skia-review.googlesource.com/148380

Original change's description:
> Revert "Add ability to specify different GP textures for each mesh in a draw."
> 
> This reverts commit d1b8a166db27ffb8e550c4e853afbd67025948bf.
> 
> Reason for revert: breaks android apps, by drawing overlapping content out of painters order.
> 
> Original change's description:
> > Add ability to specify different GP textures for each mesh in a draw.
> > 
> > Uses GrPipeline::DynamicStateArrays to allow per-mesh GP textures when
> > drawing an array of GrMeshes.
> > 
> > Uses this along with op-chaining to make drawing multiple TextureOps
> > with different textures faster.
> > 
> > Change-Id: Iec4da1b72a13d0e0c94c8a8568fe4221c539dfcf
> > Reviewed-on: https://skia-review.googlesource.com/145960
> > Commit-Queue: Brian Salomon <bsalomon@google.com>
> > Reviewed-by: Brian Osman <brianosman@google.com>
> 
> TBR=bsalomon@google.com,robertphillips@google.com,brianosman@google.com
> 
> # Not skipping CQ checks because original CL landed > 1 day ago.
> 
> Change-Id: I5c686b85adb378ba7faf34576efce74aebd348f7
> Reviewed-on: https://skia-review.googlesource.com/147260
> Reviewed-by: Derek Sollenberger <djsollen@google.com>
> Reviewed-by: Brian Osman <brianosman@google.com>
> Commit-Queue: Brian Osman <brianosman@google.com>

TBR=djsollen@google.com,bsalomon@google.com,robertphillips@google.com,brianosman@google.com

# Not skipping CQ checks because original CL landed > 1 day ago.

Change-Id: I90173d4072c64b9ec4c87989e63e4ed283bd4829
Reviewed-on: https://skia-review.googlesource.com/148681
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/GrCaps.cpp b/src/gpu/GrCaps.cpp
index c700335..d700600 100644
--- a/src/gpu/GrCaps.cpp
+++ b/src/gpu/GrCaps.cpp
@@ -61,6 +61,7 @@
     fSampleShadingSupport = false;
     fFenceSyncSupport = false;
     fCrossContextTextureSupport = false;
+    fDynamicStateArrayGeometryProcessorTextureSupport = false;
 
     fBlendEquationSupport = kBasic_BlendEquationSupport;
     fAdvBlendEqBlacklist = 0;
@@ -172,6 +173,8 @@
     writer->appendBool("Sample shading support", fSampleShadingSupport);
     writer->appendBool("Fence sync support", fFenceSyncSupport);
     writer->appendBool("Cross context texture support", fCrossContextTextureSupport);
+    writer->appendBool("Specify GeometryProcessor textures as a dynamic state array",
+                       fDynamicStateArrayGeometryProcessorTextureSupport);
 
     writer->appendBool("Blacklist Coverage Counting Path Renderer [workaround]",
                        fBlacklistCoverageCounting);
diff --git a/src/gpu/GrCaps.h b/src/gpu/GrCaps.h
index 9e18c38..3dadb1e 100644
--- a/src/gpu/GrCaps.h
+++ b/src/gpu/GrCaps.h
@@ -234,13 +234,16 @@
 
     bool fenceSyncSupport() const { return fFenceSyncSupport; }
     bool crossContextTextureSupport() const { return fCrossContextTextureSupport; }
-
     /**
      * Returns whether or not we will be able to do a copy given the passed in params
      */
     virtual bool canCopySurface(const GrSurfaceProxy* dst, const GrSurfaceProxy* src,
                                 const SkIRect& srcRect, const SkIPoint& dstPoint) const = 0;
 
+    bool dynamicStateArrayGeometryProcessorTextureSupport() const {
+        return fDynamicStateArrayGeometryProcessorTextureSupport;
+    }
+
     /**
      * This is can be called before allocating a texture to be a dst for copySurface. This is only
      * used for doing dst copies needed in blends, thus the src is always a GrRenderTargetProxy. It
@@ -327,9 +330,12 @@
     // TODO: this may need to be an enum to support different fence types
     bool fFenceSyncSupport                           : 1;
 
-    // Vulkan doesn't support this (yet) and some drivers have issues, too
+    // Requires fence sync support in GL.
     bool fCrossContextTextureSupport                 : 1;
 
+    // Not (yet) implemented in VK backend.
+    bool fDynamicStateArrayGeometryProcessorTextureSupport : 1;
+
     BlendEquationSupport fBlendEquationSupport;
     uint32_t fAdvBlendEqBlacklist;
     GR_STATIC_ASSERT(kLast_GrBlendEquation < 32);
diff --git a/src/gpu/GrGpuCommandBuffer.cpp b/src/gpu/GrGpuCommandBuffer.cpp
index d66f24c..421d057 100644
--- a/src/gpu/GrGpuCommandBuffer.cpp
+++ b/src/gpu/GrGpuCommandBuffer.cpp
@@ -47,9 +47,20 @@
     if (pipeline.isBad()) {
         return false;
     }
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        if (!fixedDynamicState->fPrimitiveProcessorTextures[i]->instantiate(resourceProvider)) {
-            return false;
+    if (fixedDynamicState && fixedDynamicState->fPrimitiveProcessorTextures) {
+        for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
+            if (!fixedDynamicState->fPrimitiveProcessorTextures[i]->instantiate(resourceProvider)) {
+                return false;
+            }
+        }
+    }
+    if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
+        int n = primProc.numTextureSamplers() * meshCount;
+        const auto* textures = dynamicStateArrays->fPrimitiveProcessorTextures;
+        for (int i = 0; i < n; ++i) {
+            if (!textures[i]->instantiate(resourceProvider)) {
+                return false;
+            }
         }
     }
 
diff --git a/src/gpu/GrOpFlushState.cpp b/src/gpu/GrOpFlushState.cpp
index 6385c9f..a1d9229 100644
--- a/src/gpu/GrOpFlushState.cpp
+++ b/src/gpu/GrOpFlushState.cpp
@@ -102,19 +102,28 @@
 
 void GrOpFlushState::draw(sk_sp<const GrGeometryProcessor> gp, const GrPipeline* pipeline,
                           const GrPipeline::FixedDynamicState* fixedDynamicState,
+                          const GrPipeline::DynamicStateArrays* dynamicStateArrays,
                           const GrMesh meshes[], int meshCnt) {
     SkASSERT(fOpArgs);
     SkASSERT(fOpArgs->fOp);
     bool firstDraw = fDraws.begin() == fDraws.end();
     auto& draw = fDraws.append(&fArena);
     GrDeferredUploadToken token = fTokenTracker->issueDrawToken();
-    for (int i = 0; i < gp->numTextureSamplers(); ++i) {
-        fixedDynamicState->fPrimitiveProcessorTextures[i]->addPendingRead();
+    if (fixedDynamicState && fixedDynamicState->fPrimitiveProcessorTextures) {
+        for (int i = 0; i < gp->numTextureSamplers(); ++i) {
+            fixedDynamicState->fPrimitiveProcessorTextures[i]->addPendingRead();
+        }
+    }
+    if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
+        int n = gp->numTextureSamplers() * meshCnt;
+        for (int i = 0; i < n; ++i) {
+            dynamicStateArrays->fPrimitiveProcessorTextures[i]->addPendingRead();
+        }
     }
     draw.fGeometryProcessor = std::move(gp);
     draw.fPipeline = pipeline;
     draw.fFixedDynamicState = fixedDynamicState;
-    draw.fDynamicStateArrays = nullptr;
+    draw.fDynamicStateArrays = dynamicStateArrays;
     draw.fMeshes = meshes;
     draw.fMeshCnt = meshCnt;
     draw.fOpID = fOpArgs->fOp->uniqueID();
@@ -165,3 +174,20 @@
 GrAtlasManager* GrOpFlushState::atlasManager() const {
     return fGpu->getContext()->contextPriv().getAtlasManager();
 }
+
+//////////////////////////////////////////////////////////////////////////////
+
+GrOpFlushState::Draw::~Draw() {
+    if (fFixedDynamicState && fFixedDynamicState->fPrimitiveProcessorTextures) {
+        for (int i = 0; i < fGeometryProcessor->numTextureSamplers(); ++i) {
+            fFixedDynamicState->fPrimitiveProcessorTextures[i]->completedRead();
+        }
+    }
+    if (fDynamicStateArrays && fDynamicStateArrays->fPrimitiveProcessorTextures) {
+        int n = fGeometryProcessor->numTextureSamplers() * fMeshCnt;
+        const auto* textures = fDynamicStateArrays->fPrimitiveProcessorTextures;
+        for (int i = 0; i < n; ++i) {
+            textures[i]->completedRead();
+        }
+    }
+}
diff --git a/src/gpu/GrOpFlushState.h b/src/gpu/GrOpFlushState.h
index 0a46ed2..4746aa2 100644
--- a/src/gpu/GrOpFlushState.h
+++ b/src/gpu/GrOpFlushState.h
@@ -76,8 +76,9 @@
     void draw(sk_sp<const GrGeometryProcessor>,
               const GrPipeline*,
               const GrPipeline::FixedDynamicState*,
+              const GrPipeline::DynamicStateArrays*,
               const GrMesh[],
-              int meshCount) final;
+              int meshCnt) final;
     void* makeVertexSpace(size_t vertexSize, int vertexCount, const GrBuffer**,
                           int* startVertex) final;
     uint16_t* makeIndexSpace(int indexCount, const GrBuffer**, int* startIndex) final;
@@ -120,11 +121,7 @@
     // that share a geometry processor into a Draw is that it allows the Gpu object to setup
     // the shared state once and then issue draws for each mesh.
     struct Draw {
-        ~Draw() {
-            for (int i = 0; i < fGeometryProcessor->numTextureSamplers(); ++i) {
-                fFixedDynamicState->fPrimitiveProcessorTextures[i]->completedRead();
-            }
-        }
+        ~Draw();
         sk_sp<const GrGeometryProcessor> fGeometryProcessor;
         const GrPipeline* fPipeline = nullptr;
         const GrPipeline::FixedDynamicState* fFixedDynamicState;
diff --git a/src/gpu/GrPipeline.h b/src/gpu/GrPipeline.h
index 5817ce8..5e75c65 100644
--- a/src/gpu/GrPipeline.h
+++ b/src/gpu/GrPipeline.h
@@ -73,7 +73,8 @@
         explicit FixedDynamicState(const SkIRect& scissorRect) : fScissorRect(scissorRect) {}
         FixedDynamicState() = default;
         SkIRect fScissorRect = SkIRect::EmptyIRect();
-        // Must have GrPrimitiveProcessor::numTextureSamplers() entries. Can be null if no samplers.
+        // Must have GrPrimitiveProcessor::numTextureSamplers() entries. Can be null if no samplers
+        // or textures are passed using DynamicStateArrays.
         GrTextureProxy** fPrimitiveProcessorTextures = nullptr;
     };
 
@@ -83,6 +84,10 @@
      */
     struct DynamicStateArrays {
         const SkIRect* fScissorRects = nullptr;
+        // Must have GrPrimitiveProcessor::numTextureSamplers() * num_meshes entries.
+        // Can be null if no samplers or to use the same textures for all meshes via'
+        // FixedDynamicState.
+        GrTextureProxy** fPrimitiveProcessorTextures = nullptr;
     };
 
     /**
diff --git a/src/gpu/GrRenderTargetOpList.h b/src/gpu/GrRenderTargetOpList.h
index c15c054..e74a25c 100644
--- a/src/gpu/GrRenderTargetOpList.h
+++ b/src/gpu/GrRenderTargetOpList.h
@@ -179,7 +179,7 @@
     int                            fLastClipNumAnalyticFPs;
 
     // For ops/opList we have mean: 5 stdDev: 28
-    SkSTArray<5, RecordedOp, true> fRecordedOps;
+    SkSTArray<25, RecordedOp, true> fRecordedOps;
 
     // MDB TODO: 4096 for the first allocation of the clip space will be huge overkill.
     // Gather statistics to determine the correct size.
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index 2051c61..5ccbae1 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -571,6 +571,8 @@
     // Safely moving textures between contexts requires fences.
     fCrossContextTextureSupport = fFenceSyncSupport;
 
+    fDynamicStateArrayGeometryProcessorTextureSupport = true;
+
     if (kGL_GrGLStandard == standard) {
         if (version >= GR_GL_VER(4, 1)) {
             fProgramBinarySupport = true;
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 9129344..74fbedd 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -1674,9 +1674,11 @@
     }
 }
 
-void GrGLGpu::generateMipmapsForProcessorTextures(const GrPrimitiveProcessor& primProc,
-                                                  const GrPipeline& pipeline,
-                                                  const GrTextureProxy* const primProcTextures[]) {
+void GrGLGpu::resolveAndGenerateMipMapsForProcessorTextures(
+        const GrPrimitiveProcessor& primProc,
+        const GrPipeline& pipeline,
+        const GrTextureProxy* const primProcTextures[],
+        int numPrimitiveProcessorTextureSets) {
     auto genLevelsIfNeeded = [this](GrTexture* tex, const GrSamplerState& sampler) {
         SkASSERT(tex);
         if (sampler.filter() == GrSamplerState::Filter::kMipMap &&
@@ -1684,12 +1686,19 @@
             tex->texturePriv().mipMapsAreDirty()) {
             SkASSERT(this->caps()->mipMapSupport());
             this->regenerateMipMapLevels(static_cast<GrGLTexture*>(tex));
+            SkASSERT(!tex->asRenderTarget() || !tex->asRenderTarget()->needsResolve());
+        } else if (auto* rt = tex->asRenderTarget()) {
+            if (rt->needsResolve()) {
+                this->resolveRenderTarget(rt);
+            }
         }
     };
 
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        GrTexture* tex = primProcTextures[i]->peekTexture();
-        genLevelsIfNeeded(tex, primProc.textureSampler(i).samplerState());
+    for (int set = 0, tex = 0; set < numPrimitiveProcessorTextureSets; ++set) {
+        for (int sampler = 0; sampler < primProc.numTextureSamplers(); ++sampler, ++tex) {
+            GrTexture* texture = primProcTextures[tex]->peekTexture();
+            genLevelsIfNeeded(texture, primProc.textureSampler(sampler).samplerState());
+        }
     }
 
     GrFragmentProcessor::Iter iter(pipeline);
@@ -1704,17 +1713,26 @@
 bool GrGLGpu::flushGLState(const GrPrimitiveProcessor& primProc,
                            const GrPipeline& pipeline,
                            const GrPipeline::FixedDynamicState* fixedDynamicState,
+                           const GrPipeline::DynamicStateArrays* dynamicStateArrays,
+                           int dynamicStateArraysLength,
                            bool willDrawPoints) {
     sk_sp<GrGLProgram> program(fProgramCache->refProgram(this, primProc, pipeline, willDrawPoints));
     if (!program) {
         GrCapsDebugf(this->caps(), "Failed to create program!\n");
         return false;
     }
-    const GrTextureProxy* const* primProcProxies = nullptr;
-    if (fixedDynamicState) {
-        primProcProxies = fixedDynamicState->fPrimitiveProcessorTextures;
+    const GrTextureProxy* const* primProcProxiesForMipRegen = nullptr;
+    const GrTextureProxy* const* primProcProxiesToBind = nullptr;
+    int numPrimProcTextureSets = 1;  // number of texture per prim proc sampler.
+    if (dynamicStateArrays && dynamicStateArrays->fPrimitiveProcessorTextures) {
+        primProcProxiesForMipRegen = dynamicStateArrays->fPrimitiveProcessorTextures;
+        numPrimProcTextureSets = dynamicStateArraysLength;
+    } else if (fixedDynamicState && fixedDynamicState->fPrimitiveProcessorTextures) {
+        primProcProxiesForMipRegen = fixedDynamicState->fPrimitiveProcessorTextures;
+        primProcProxiesToBind = fixedDynamicState->fPrimitiveProcessorTextures;
     }
-    this->generateMipmapsForProcessorTextures(primProc, pipeline, primProcProxies);
+    this->resolveAndGenerateMipMapsForProcessorTextures(
+            primProc, pipeline, primProcProxiesForMipRegen, numPrimProcTextureSets);
 
     GrXferProcessor::BlendInfo blendInfo;
     pipeline.getXferProcessor().getBlendInfo(&blendInfo);
@@ -1731,7 +1749,7 @@
         this->flushBlend(blendInfo, swizzle);
     }
 
-    fHWProgram->updateUniformsAndTextureBindings(primProc, pipeline, primProcProxies);
+    fHWProgram->updateUniformsAndTextureBindings(primProc, pipeline, primProcProxiesToBind);
 
     GrGLRenderTarget* glRT = static_cast<GrGLRenderTarget*>(pipeline.renderTarget());
     GrStencilSettings stencil;
@@ -2254,30 +2272,40 @@
             break;
         }
     }
-    if (!this->flushGLState(primProc, pipeline, fixedDynamicState, hasPoints)) {
+    if (!this->flushGLState(primProc, pipeline, fixedDynamicState, dynamicStateArrays, meshCount,
+                            hasPoints)) {
         return;
     }
 
-    bool dynamicScissor =
-            pipeline.isScissorEnabled() && dynamicStateArrays && dynamicStateArrays->fScissorRects;
-    for (int i = 0; i < meshCount; ++i) {
+    bool dynamicScissor = false;
+    bool dynamicPrimProcTextures = false;
+    if (dynamicStateArrays) {
+        dynamicScissor = pipeline.isScissorEnabled() && dynamicStateArrays->fScissorRects;
+        dynamicPrimProcTextures = dynamicStateArrays->fPrimitiveProcessorTextures;
+    }
+    for (int m = 0; m < meshCount; ++m) {
         if (GrXferBarrierType barrierType = pipeline.xferBarrierType(*this->caps())) {
             this->xferBarrier(pipeline.renderTarget(), barrierType);
         }
 
         if (dynamicScissor) {
             GrGLRenderTarget* glRT = static_cast<GrGLRenderTarget*>(pipeline.renderTarget());
-            this->flushScissor(GrScissorState(dynamicStateArrays->fScissorRects[i]),
+            this->flushScissor(GrScissorState(dynamicStateArrays->fScissorRects[m]),
                                glRT->getViewport(), pipeline.proxy()->origin());
         }
+        if (dynamicPrimProcTextures) {
+            auto texProxyArray = dynamicStateArrays->fPrimitiveProcessorTextures +
+                                 m * primProc.numTextureSamplers();
+            fHWProgram->updatePrimitiveProcessorTextureBindings(primProc, texProxyArray);
+        }
         if (this->glCaps().requiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines() &&
-            GrIsPrimTypeLines(meshes[i].primitiveType()) &&
+            GrIsPrimTypeLines(meshes[m].primitiveType()) &&
             !GrIsPrimTypeLines(fLastPrimitiveType)) {
             GL_CALL(Enable(GR_GL_CULL_FACE));
             GL_CALL(Disable(GR_GL_CULL_FACE));
         }
-        meshes[i].sendToGpu(this);
-        fLastPrimitiveType = meshes[i].primitiveType();
+        meshes[m].sendToGpu(this);
+        fLastPrimitiveType = meshes[m].primitiveType();
     }
 
 #if SWAP_PER_DRAW
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 6b01d65..295ca42 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -251,14 +251,22 @@
 
     void setTextureSwizzle(int unitIdx, GrGLenum target, const GrGLenum swizzle[]);
 
-    void generateMipmapsForProcessorTextures(
+    /**
+     * primitiveProcessorTextures must contain GrPrimitiveProcessor::numTextureSamplers() *
+     * numPrimitiveProcessorTextureSets entries.
+     */
+    void resolveAndGenerateMipMapsForProcessorTextures(
             const GrPrimitiveProcessor&, const GrPipeline&,
-            const GrTextureProxy* const primitiveProcessorTextures[]);
+            const GrTextureProxy* const primitiveProcessorTextures[],
+            int numPrimitiveProcessorTextureSets);
 
     // Flushes state from GrPipeline to GL. Returns false if the state couldn't be set.
     // willDrawPoints must be true if point primitives will be rendered after setting the GL state.
+    // If DynamicStateArrays is not null then dynamicStateArraysLength is the number of dynamic
+    // state entries in each array.
     bool flushGLState(const GrPrimitiveProcessor&, const GrPipeline&,
-                      const GrPipeline::FixedDynamicState*, bool willDrawPoints);
+                      const GrPipeline::FixedDynamicState*, const GrPipeline::DynamicStateArrays*,
+                      int dynamicStateArraysLength, bool willDrawPoints);
 
     void flushProgram(sk_sp<GrGLProgram>);
 
diff --git a/src/gpu/gl/GrGLPathRendering.cpp b/src/gpu/gl/GrGLPathRendering.cpp
index 8bd4c9c..1b3cfbd 100644
--- a/src/gpu/gl/GrGLPathRendering.cpp
+++ b/src/gpu/gl/GrGLPathRendering.cpp
@@ -116,7 +116,7 @@
                                    const GrPipeline::FixedDynamicState& fixedDynamicState,
                                    const GrStencilSettings& stencilPassSettings,
                                    const GrPath* path) {
-    if (!this->gpu()->flushGLState(primProc, pipeline, &fixedDynamicState, false)) {
+    if (!this->gpu()->flushGLState(primProc, pipeline, &fixedDynamicState, nullptr, 1, false)) {
         return;
     }
     const GrGLPath* glPath = static_cast<const GrGLPath*>(path);
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 9e90642..102dc4a 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -75,7 +75,6 @@
 void GrGLProgram::updateUniformsAndTextureBindings(const GrPrimitiveProcessor& primProc,
                                                    const GrPipeline& pipeline,
                                                    const GrTextureProxy* const primProcTextures[]) {
-    SkASSERT(primProcTextures || !primProc.numTextureSamplers());
     this->setRenderTargetState(primProc, pipeline.proxy());
 
     // we set the textures, and uniforms for installed processors in a generic way, but subclasses
@@ -84,13 +83,12 @@
     // We must bind to texture units in the same order in which we set the uniforms in
     // GrGLProgramDataManager. That is, we bind textures for processors in this order:
     // primProc, fragProcs, XP.
-    int nextTexSamplerIdx = 0;
     fPrimitiveProcessor->setData(fProgramDataManager, primProc,
                                  GrFragmentProcessor::CoordTransformIter(pipeline));
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        auto* tex = static_cast<GrGLTexture*>(primProcTextures[i]->peekTexture());
-        fGpu->bindTexture(nextTexSamplerIdx++, primProc.textureSampler(i).samplerState(), tex);
+    if (primProcTextures) {
+        this->updatePrimitiveProcessorTextureBindings(primProc, primProcTextures);
     }
+    int nextTexSamplerIdx = primProc.numTextureSamplers();
 
     this->setFragmentData(pipeline, &nextTexSamplerIdx);
 
@@ -106,6 +104,14 @@
     SkASSERT(nextTexSamplerIdx == fNumTextureSamplers);
 }
 
+void GrGLProgram::updatePrimitiveProcessorTextureBindings(const GrPrimitiveProcessor& primProc,
+                                                          const GrTextureProxy* const proxies[]) {
+    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
+        auto* tex = static_cast<GrGLTexture*>(proxies[i]->peekTexture());
+        fGpu->bindTexture(i, primProc.textureSampler(i).samplerState(), tex);
+    }
+}
+
 void GrGLProgram::setFragmentData(const GrPipeline& pipeline, int* nextTexSamplerIdx) {
     GrFragmentProcessor::Iter iter(pipeline);
     GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
diff --git a/src/gpu/gl/GrGLProgram.h b/src/gpu/gl/GrGLProgram.h
index ca9253c..1a2ef20 100644
--- a/src/gpu/gl/GrGLProgram.h
+++ b/src/gpu/gl/GrGLProgram.h
@@ -112,13 +112,17 @@
 
     /**
      * This function uploads uniforms, calls each GrGLSL*Processor's setData. It binds all fragment
-     * processor textures. Primitive process textures are also bound here but are passed separately.
+     * processor textures. Primitive process textures can be bound using this function or by
+     * calling updatePrimitiveProcessorTextureBindings.
      *
      * It is the caller's responsibility to ensure the program is bound before calling.
      */
     void updateUniformsAndTextureBindings(const GrPrimitiveProcessor&, const GrPipeline&,
                                           const GrTextureProxy* const primitiveProcessorTextures[]);
 
+    void updatePrimitiveProcessorTextureBindings(const GrPrimitiveProcessor&,
+                                                 const GrTextureProxy* const[]);
+
     int vertexStride() const { return fVertexStride; }
     int instanceStride() const { return fInstanceStride; }
 
diff --git a/src/gpu/ops/GrAAConvexPathRenderer.cpp b/src/gpu/ops/GrAAConvexPathRenderer.cpp
index e9a612f..837f405 100644
--- a/src/gpu/ops/GrAAConvexPathRenderer.cpp
+++ b/src/gpu/ops/GrAAConvexPathRenderer.cpp
@@ -938,7 +938,7 @@
                 firstIndex += draw.fIndexCnt;
                 firstVertex += draw.fVertexCnt;
             }
-            target->draw(quadProcessor, pipe.fPipeline, pipe.fFixedDynamicState, meshes,
+            target->draw(quadProcessor, pipe.fPipeline, pipe.fFixedDynamicState, nullptr, meshes,
                          draws.count());
         }
     }
diff --git a/src/gpu/ops/GrMeshDrawOp.cpp b/src/gpu/ops/GrMeshDrawOp.cpp
index 620ea47..54d2165 100644
--- a/src/gpu/ops/GrMeshDrawOp.cpp
+++ b/src/gpu/ops/GrMeshDrawOp.cpp
@@ -73,6 +73,29 @@
 
 //////////////////////////////////////////////////////////////////////////////
 
+GrPipeline::FixedDynamicState* GrMeshDrawOp::Target::allocFixedDynamicState(
+        const SkIRect& rect, int numPrimitiveProcessorTextures) {
+    auto result = this->pipelineArena()->make<GrPipeline::FixedDynamicState>(rect);
+    if (numPrimitiveProcessorTextures) {
+        result->fPrimitiveProcessorTextures =
+                this->allocPrimitiveProcessorTextureArray(numPrimitiveProcessorTextures);
+    }
+    return result;
+}
+
+GrPipeline::DynamicStateArrays* GrMeshDrawOp::Target::allocDynamicStateArrays(
+        int numMeshes, int numPrimitiveProcessorTextures, bool allocScissors) {
+    auto result = this->pipelineArena()->make<GrPipeline::DynamicStateArrays>();
+    if (allocScissors) {
+        result->fScissorRects = this->pipelineArena()->makeArray<SkIRect>(numMeshes);
+    }
+    if (numPrimitiveProcessorTextures) {
+        result->fPrimitiveProcessorTextures = this->allocPrimitiveProcessorTextureArray(
+                numPrimitiveProcessorTextures * numMeshes);
+    }
+    return result;
+}
+
 GrMeshDrawOp::Target::PipelineAndFixedDynamicState GrMeshDrawOp::Target::makePipeline(
         uint32_t pipelineFlags, GrProcessorSet&& processorSet, GrAppliedClip&& clip,
         int numPrimProcTextures) {
diff --git a/src/gpu/ops/GrMeshDrawOp.h b/src/gpu/ops/GrMeshDrawOp.h
index a6e7da5..808a1d4 100644
--- a/src/gpu/ops/GrMeshDrawOp.h
+++ b/src/gpu/ops/GrMeshDrawOp.h
@@ -83,15 +83,15 @@
     virtual void draw(sk_sp<const GrGeometryProcessor>,
                       const GrPipeline*,
                       const GrPipeline::FixedDynamicState*,
+                      const GrPipeline::DynamicStateArrays*,
                       const GrMesh[],
                       int meshCount) = 0;
-
     /** Helper for drawing a single GrMesh. */
     void draw(sk_sp<const GrGeometryProcessor> gp,
               const GrPipeline* pipeline,
               const GrPipeline::FixedDynamicState* fixedDynamicState,
               const GrMesh* mesh) {
-        this->draw(std::move(gp), pipeline, fixedDynamicState, mesh, 1);
+        this->draw(std::move(gp), pipeline, fixedDynamicState, nullptr, mesh, 1);
     }
 
     /**
@@ -152,14 +152,11 @@
     GrMesh* allocMeshes(int n) { return this->pipelineArena()->makeArray<GrMesh>(n); }
 
     GrPipeline::FixedDynamicState* allocFixedDynamicState(const SkIRect& rect,
-                                                          int numPrimitiveProcessorTextures = 0) {
-        auto result = this->pipelineArena()->make<GrPipeline::FixedDynamicState>(rect);
-        if (numPrimitiveProcessorTextures) {
-            result->fPrimitiveProcessorTextures =
-                    this->allocPrimitiveProcessorTextureArray(numPrimitiveProcessorTextures);
-        }
-        return result;
-    }
+                                                          int numPrimitiveProcessorTextures = 0);
+
+    GrPipeline::DynamicStateArrays* allocDynamicStateArrays(int numMeshes,
+                                                            int numPrimitiveProcessorTextures,
+                                                            bool allocScissors);
 
     GrTextureProxy** allocPrimitiveProcessorTextureArray(int n) {
         SkASSERT(n > 0);
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index 2ba3639..fa86c2f 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -611,7 +611,7 @@
     }
 
     template <typename Pos, Domain D, GrAA AA>
-    void tess(void* v, const GrGeometryProcessor* gp) {
+    void tess(void* v, const GrGeometryProcessor* gp) const {
         using Vertex = TextureGeometryProcessor::Vertex<Pos, D, AA>;
         SkASSERT(gp->debugOnly_vertexStride() == sizeof(Vertex));
         auto vertices = static_cast<Vertex*>(v);
@@ -628,16 +628,25 @@
     }
 
     void onPrepareDraws(Target* target) override {
-        if (!fProxy->instantiate(target->resourceProvider())) {
-            return;
+        bool hasPerspective = false;
+        Domain domain = Domain::kNo;
+        int numOps = 0;
+        for (const auto& op : ChainRange<TextureOp>(this)) {
+            ++numOps;
+            hasPerspective |= op.fPerspective;
+            if (op.fDomain) {
+                domain = Domain::kYes;
+            }
+            if (!op.fProxy->instantiate(target->resourceProvider())) {
+                return;
+            }
         }
 
-        Domain domain = fDomain ? Domain::kYes : Domain::kNo;
         bool coverageAA = GrAAType::kCoverage == this->aaType();
         sk_sp<GrGeometryProcessor> gp = TextureGeometryProcessor::Make(
                 fProxy->textureType(), fProxy->config(), fFilter,
                 std::move(fTextureColorSpaceXform), std::move(fPaintColorSpaceXform), coverageAA,
-                fPerspective, domain, *target->caps().shaderCaps());
+                hasPerspective, domain, *target->caps().shaderCaps());
         GrPipeline::InitArgs args;
         args.fProxy = target->proxy();
         args.fCaps = &target->caps();
@@ -648,8 +657,17 @@
         }
 
         auto clip = target->detachAppliedClip();
-        auto* fixedDynamicState = target->allocFixedDynamicState(clip.scissorState().rect(), 1);
-        fixedDynamicState->fPrimitiveProcessorTextures[0] = fProxy;
+        // We'll use a dynamic state array for the GP textures when there are multiple ops.
+        // Otherwise, we use fixed dynamic state to specify the single op's proxy.
+        GrPipeline::DynamicStateArrays* dynamicStateArrays = nullptr;
+        GrPipeline::FixedDynamicState* fixedDynamicState;
+        if (numOps > 1) {
+            dynamicStateArrays = target->allocDynamicStateArrays(numOps, 1, false);
+            fixedDynamicState = target->allocFixedDynamicState(clip.scissorState().rect(), 0);
+        } else {
+            fixedDynamicState = target->allocFixedDynamicState(clip.scissorState().rect(), 1);
+            fixedDynamicState->fPrimitiveProcessorTextures[0] = fProxy;
+        }
         const auto* pipeline =
                 target->allocPipeline(args, GrProcessorSet::MakeEmptySet(), std::move(clip));
         using TessFn = decltype(&TextureOp::tess<SkPoint, Domain::kNo, GrAA::kNo>);
@@ -673,42 +691,50 @@
         };
 #undef TESS_FN_AND_VERTEX_SIZE
         int tessFnIdx = 0;
-        tessFnIdx |= coverageAA   ? 0x1 : 0x0;
-        tessFnIdx |= fDomain      ? 0x2 : 0x0;
-        tessFnIdx |= fPerspective ? 0x4 : 0x0;
+        tessFnIdx |= coverageAA               ? 0x1 : 0x0;
+        tessFnIdx |= (domain == Domain::kYes) ? 0x2 : 0x0;
+        tessFnIdx |= hasPerspective           ? 0x4 : 0x0;
 
         SkASSERT(kTessFnsAndVertexSizes[tessFnIdx].fVertexSize == gp->debugOnly_vertexStride());
 
-        int vstart;
-        const GrBuffer* vbuffer;
-        void* vdata = target->makeVertexSpace(kTessFnsAndVertexSizes[tessFnIdx].fVertexSize,
-                                              4 * fDraws.count(), &vbuffer, &vstart);
-        if (!vdata) {
-            SkDebugf("Could not allocate vertices\n");
-            return;
-        }
-
-        (this->*(kTessFnsAndVertexSizes[tessFnIdx].fTessFn))(vdata, gp.get());
-
-        GrPrimitiveType primitiveType =
-                fDraws.count() > 1 ? GrPrimitiveType::kTriangles : GrPrimitiveType::kTriangleStrip;
-        GrMesh* mesh = target->allocMesh(primitiveType);
-        if (fDraws.count() > 1) {
-            sk_sp<const GrBuffer> ibuffer = target->resourceProvider()->refQuadIndexBuffer();
-            if (!ibuffer) {
-                SkDebugf("Could not allocate quad indices\n");
+        GrMesh* meshes = target->allocMeshes(numOps);
+        int i = 0;
+        for (const auto& op : ChainRange<TextureOp>(this)) {
+            int vstart;
+            const GrBuffer* vbuffer;
+            void* vdata = target->makeVertexSpace(kTessFnsAndVertexSizes[tessFnIdx].fVertexSize,
+                                                  4 * op.fDraws.count(), &vbuffer, &vstart);
+            if (!vdata) {
+                SkDebugf("Could not allocate vertices\n");
                 return;
             }
-            mesh->setIndexedPatterned(ibuffer.get(), 6, 4, fDraws.count(),
-                                      GrResourceProvider::QuadCountOfQuadBuffer());
-        } else {
-            mesh->setNonIndexedNonInstanced(4);
+
+            (op.*(kTessFnsAndVertexSizes[tessFnIdx].fTessFn))(vdata, gp.get());
+
+            if (op.fDraws.count() > 1) {
+                meshes[i].setPrimitiveType(GrPrimitiveType::kTriangles);
+                sk_sp<const GrBuffer> ibuffer = target->resourceProvider()->refQuadIndexBuffer();
+                if (!ibuffer) {
+                    SkDebugf("Could not allocate quad indices\n");
+                    return;
+                }
+                meshes[i].setIndexedPatterned(ibuffer.get(), 6, 4, op.fDraws.count(),
+                                              GrResourceProvider::QuadCountOfQuadBuffer());
+            } else {
+                meshes[i].setPrimitiveType(GrPrimitiveType::kTriangleStrip);
+                meshes[i].setNonIndexedNonInstanced(4);
+            }
+            meshes[i].setVertexData(vbuffer, vstart);
+            if (dynamicStateArrays) {
+                dynamicStateArrays->fPrimitiveProcessorTextures[i] = op.fProxy;
+            }
+            ++i;
         }
-        mesh->setVertexData(vbuffer, vstart);
-        target->draw(std::move(gp), pipeline, fixedDynamicState, mesh);
+        target->draw(std::move(gp), pipeline, fixedDynamicState, dynamicStateArrays, meshes,
+                     numOps);
     }
 
-    CombineResult onCombineIfPossible(GrOp* t, const GrCaps&) override {
+    CombineResult onCombineIfPossible(GrOp* t, const GrCaps& caps) override {
         const auto* that = t->cast<TextureOp>();
         if (!GrColorSpaceXform::Equals(fTextureColorSpaceXform.get(),
                                        that->fTextureColorSpaceXform.get())) {
@@ -721,7 +747,17 @@
         if (this->aaType() != that->aaType()) {
             return CombineResult::kCannotCombine;
         }
-        if (fProxy->uniqueID() != that->fProxy->uniqueID() || fFilter != that->fFilter) {
+        if (fFilter != that->fFilter) {
+            return CombineResult::kCannotCombine;
+        }
+        if (fProxy->uniqueID() != that->fProxy->uniqueID() || that->isChained()) {
+            // We can't merge across different proxies (and we're disallowed from merging when
+            // 'that' is chained. Check if we can be chained with 'that'.
+            if (fProxy->config() == that->fProxy->config() &&
+                fProxy->textureType() == that->fProxy->textureType() &&
+                caps.dynamicStateArrayGeometryProcessorTextureSupport()) {
+                return CombineResult::kMayChain;
+            }
             return CombineResult::kCannotCombine;
         }
         fDraws.push_back_n(that->fDraws.count(), that->fDraws.begin());
diff --git a/src/gpu/vk/GrVkPipelineState.cpp b/src/gpu/vk/GrVkPipelineState.cpp
index cd59eab..6c2b00b 100644
--- a/src/gpu/vk/GrVkPipelineState.cpp
+++ b/src/gpu/vk/GrVkPipelineState.cpp
@@ -181,10 +181,12 @@
 
     fGeometryProcessor->setData(fDataManager, primProc,
                                 GrFragmentProcessor::CoordTransformIter(pipeline));
-    for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
-        const auto& sampler = primProc.textureSampler(i);
-        auto texture = static_cast<GrVkTexture*>(primProcTextures[i]->peekTexture());
-        samplerBindings[currTextureBinding++] = {sampler.samplerState(), texture};
+    if (primProcTextures) {
+        for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
+            const auto& sampler = primProc.textureSampler(i);
+            auto texture = static_cast<GrVkTexture*>(primProcTextures[i]->peekTexture());
+            samplerBindings[currTextureBinding++] = {sampler.samplerState(), texture};
+        }
     }
 
     GrFragmentProcessor::Iter iter(pipeline);