Roll external/skia 19f466d399f1..838007f1ffd6 (7 commits)

https://skia.googlesource.com/skia.git/+log/19f466d399f1..838007f1ffd6

If this roll has caused a breakage, revert this CL and stop the roller
using the controls here:
https://skia-autoroll.corp.goog/r/android-master-autoroll
Please CC stani@google.com on the revert to ensure that a human
is aware of the problem.

To report a problem with the AutoRoller itself, please file a bug:
https://bugs.chromium.org/p/skia/issues/entry?template=Autoroller+Bug

Documentation for the AutoRoller is here:
https://skia.googlesource.com/buildbot/+/master/autoroll/README.md

Test: Presubmit checks will test this change.
Exempt-From-Owner-Approval: The autoroll bot does not require owner approval.
Change-Id: I0b9372a1cb040333c1a75e91fba535d270fc545a
diff --git a/Android.bp b/Android.bp
index 97e1776..ab57923 100644
--- a/Android.bp
+++ b/Android.bp
@@ -685,6 +685,7 @@
           "src/gpu/effects/generated/GrComposeLerpRedEffect.cpp",
           "src/gpu/effects/generated/GrConfigConversionEffect.cpp",
           "src/gpu/effects/generated/GrConstColorProcessor.cpp",
+          "src/gpu/effects/generated/GrDeviceSpaceEffect.cpp",
           "src/gpu/effects/generated/GrEllipseEffect.cpp",
           "src/gpu/effects/generated/GrHSLToRGBFilterEffect.cpp",
           "src/gpu/effects/generated/GrLumaColorFilterEffect.cpp",
diff --git a/gm/windowrectangles.cpp b/gm/windowrectangles.cpp
index b14d137..1244b90 100644
--- a/gm/windowrectangles.cpp
+++ b/gm/windowrectangles.cpp
@@ -33,6 +33,7 @@
 #include "src/gpu/GrFixedClip.h"
 #include "src/gpu/GrFragmentProcessor.h"
 #include "src/gpu/GrPaint.h"
+#include "src/gpu/GrRecordingContextPriv.h"
 #include "src/gpu/GrReducedClip.h"
 #include "src/gpu/GrRenderTargetContext.h"
 #include "src/gpu/GrRenderTargetContextPriv.h"
@@ -40,6 +41,7 @@
 #include "src/gpu/GrTextureProxy.h"
 #include "src/gpu/GrUserStencilSettings.h"
 #include "src/gpu/effects/GrTextureDomain.h"
+#include "src/gpu/effects/generated/GrDeviceSpaceEffect.h"
 #include "tools/ToolUtils.h"
 
 #include <utility>
@@ -177,10 +179,17 @@
     AlphaOnlyClip(GrSurfaceProxyView mask, int x, int y) : fMask(std::move(mask)), fX(x), fY(y) {}
 
 private:
-    bool apply(GrRecordingContext*, GrRenderTargetContext*, bool, bool, GrAppliedClip* out,
+    bool apply(GrRecordingContext* ctx, GrRenderTargetContext*, bool, bool, GrAppliedClip* out,
                SkRect* bounds) const override {
-        out->addCoverageFP(GrDeviceSpaceTextureDecalFragmentProcessor::Make(
-                fMask, SkIRect::MakeSize(fMask.proxy()->dimensions()), {fX, fY}));
+        GrSamplerState samplerState(GrSamplerState::WrapMode::kClampToBorder,
+                                    GrSamplerState::Filter::kNearest);
+        auto m = SkMatrix::MakeTrans(-fX, -fY);
+        auto subset = SkRect::Make(fMask.dimensions());
+        auto domain = bounds->makeOffset(-fX, -fY).makeInset(0.5, 0.5);
+        auto fp = GrTextureEffect::MakeSubset(fMask, kPremul_SkAlphaType, m, samplerState, subset,
+                                              domain, *ctx->priv().caps());
+        fp = GrDeviceSpaceEffect::Make(std::move(fp));
+        out->addCoverageFP(std::move(fp));
         return true;
     }
     GrSurfaceProxyView fMask;
diff --git a/gn/gpu.gni b/gn/gpu.gni
index 28f06d3..7eb1198 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -374,6 +374,8 @@
   "$_src/gpu/effects/generated/GrConstColorProcessor.h",
   "$_src/gpu/effects/generated/GrEllipseEffect.cpp",
   "$_src/gpu/effects/generated/GrEllipseEffect.h",
+  "$_src/gpu/effects/generated/GrDeviceSpaceEffect.cpp",
+  "$_src/gpu/effects/generated/GrDeviceSpaceEffect.h",
   "$_src/gpu/effects/generated/GrHSLToRGBFilterEffect.cpp",
   "$_src/gpu/effects/generated/GrHSLToRGBFilterEffect.h",
   "$_src/gpu/effects/generated/GrLumaColorFilterEffect.cpp",
diff --git a/gn/sksl.gni b/gn/sksl.gni
index a692166..7ad737a 100644
--- a/gn/sksl.gni
+++ b/gn/sksl.gni
@@ -47,6 +47,7 @@
   "$_src/gpu/effects/GrConfigConversionEffect.fp",
   "$_src/gpu/effects/GrConstColorProcessor.fp",
   "$_src/gpu/effects/GrColorMatrixFragmentProcessor.fp",
+  "$_src/gpu/effects/GrDeviceSpaceEffect.fp",
   "$_src/gpu/effects/GrEllipseEffect.fp",
   "$_src/gpu/effects/GrHSLToRGBFilterEffect.fp",
   "$_src/gpu/effects/GrLumaColorFilterEffect.fp",
diff --git a/include/gpu/d3d/GrD3D12.h b/include/gpu/d3d/GrD3D12.h
index 9b806a0..e318ca6 100644
--- a/include/gpu/d3d/GrD3D12.h
+++ b/include/gpu/d3d/GrD3D12.h
@@ -9,6 +9,7 @@
 #define GrD3D12_DEFINED
 
 #include <d3d12.h>
+#include <dxgi1_4.h>
 #include <wrl/client.h>  // for ComPtr
 
 // Abbreviate and alias ComPtr
diff --git a/include/gpu/d3d/GrD3DBackendContext.h b/include/gpu/d3d/GrD3DBackendContext.h
index a12da6b..abead01 100644
--- a/include/gpu/d3d/GrD3DBackendContext.h
+++ b/include/gpu/d3d/GrD3DBackendContext.h
@@ -15,6 +15,7 @@
 // The BackendContext contains all of the base D3D objects needed by the GrD3DGpu. The assumption
 // is that the client will set these up and pass them to the GrD3DGpu constructor.
 struct SK_API GrD3DBackendContext {
+    gr_cp<IDXGIAdapter1> fAdapter;
     gr_cp<ID3D12Device> fDevice;
     gr_cp<ID3D12CommandQueue> fQueue;
     GrProtected fProtectedContext = GrProtected::kNo;
diff --git a/infra/config/recipes.cfg b/infra/config/recipes.cfg
index 677fed0..23d43ad 100644
--- a/infra/config/recipes.cfg
+++ b/infra/config/recipes.cfg
@@ -14,7 +14,7 @@
   "deps": {
     "depot_tools": {
       "branch": "master",
-      "revision": "227d5108573318ac6510f17d80c167d94ceae3e1",
+      "revision": "b847c567e717cd861b8f1d778ef1a402af03a99a",
       "url": "https://chromium.googlesource.com/chromium/tools/depot_tools.git"
     },
     "recipe_engine": {
diff --git a/src/gpu/GrClipStackClip.cpp b/src/gpu/GrClipStackClip.cpp
index 2c5a941..66f2de8 100644
--- a/src/gpu/GrClipStackClip.cpp
+++ b/src/gpu/GrClipStackClip.cpp
@@ -5,12 +5,13 @@
  * found in the LICENSE file.
  */
 
+#include "src/gpu/GrClipStackClip.h"
+
 #include "include/private/SkTo.h"
 #include "src/core/SkClipOpPriv.h"
 #include "src/core/SkTaskGroup.h"
 #include "src/core/SkTraceEvent.h"
 #include "src/gpu/GrAppliedClip.h"
-#include "src/gpu/GrClipStackClip.h"
 #include "src/gpu/GrContextPriv.h"
 #include "src/gpu/GrDeferredProxyUploader.h"
 #include "src/gpu/GrDrawingManager.h"
@@ -26,6 +27,7 @@
 #include "src/gpu/effects/GrConvexPolyEffect.h"
 #include "src/gpu/effects/GrRRectEffect.h"
 #include "src/gpu/effects/GrTextureDomain.h"
+#include "src/gpu/effects/generated/GrDeviceSpaceEffect.h"
 #include "src/gpu/geometry/GrShape.h"
 
 typedef SkClipStack::Element Element;
@@ -78,11 +80,18 @@
 ////////////////////////////////////////////////////////////////////////////////
 // set up the draw state to enable the aa clipping mask.
 static std::unique_ptr<GrFragmentProcessor> create_fp_for_mask(GrSurfaceProxyView mask,
-                                                               const SkIRect& devBound) {
-    SkASSERT(mask.asTextureProxy());
-    SkIRect domainTexels = SkIRect::MakeWH(devBound.width(), devBound.height());
-    return GrDeviceSpaceTextureDecalFragmentProcessor::Make(std::move(mask), domainTexels,
-                                                            {devBound.fLeft, devBound.fTop});
+                                                               const SkIRect& devBound,
+                                                               const GrCaps& caps) {
+    GrSamplerState samplerState(GrSamplerState::WrapMode::kClampToBorder,
+                                GrSamplerState::Filter::kNearest);
+    auto m = SkMatrix::MakeTrans(-devBound.fLeft, -devBound.fTop);
+    auto subset = SkRect::Make(devBound.size());
+    // We scissor to devBounds. The mask's texel centers are aligned to device space
+    // pixel centers. Hence this domain of texture coordinates.
+    auto domain = subset.makeInset(0.5, 0.5);
+    auto fp = GrTextureEffect::MakeSubset(std::move(mask), kPremul_SkAlphaType, m, samplerState,
+                                          subset, domain, caps);
+    return GrDeviceSpaceEffect::Make(std::move(fp));
 }
 
 // Does the path in 'element' require SW rendering? If so, return true (and,
@@ -283,7 +292,8 @@
         if (result) {
             // The mask's top left coord should be pinned to the rounded-out top left corner of
             // the clip's device space bounds.
-            out->addCoverageFP(create_fp_for_mask(std::move(result), reducedClip.scissor()));
+            out->addCoverageFP(create_fp_for_mask(std::move(result), reducedClip.scissor(),
+                                                  *context->priv().caps()));
             return true;
         }
 
diff --git a/src/gpu/GrMesh.h b/src/gpu/GrMesh.h
index 979ec0a..19e5f22 100644
--- a/src/gpu/GrMesh.h
+++ b/src/gpu/GrMesh.h
@@ -10,6 +10,7 @@
 
 #include "src/gpu/GrBuffer.h"
 #include "src/gpu/GrGpuBuffer.h"
+#include "src/gpu/GrOpsRenderPass.h"
 
 class GrPrimitiveProcessor;
 
@@ -57,23 +58,7 @@
 
     void setVertexData(sk_sp<const GrBuffer> vertexBuffer, int baseVertex = 0);
 
-    class SendToGpuImpl {
-    public:
-        virtual void sendArrayMeshToGpu(GrPrimitiveType, const GrMesh&, int vertexCount,
-                                        int baseVertex) = 0;
-        virtual void sendIndexedMeshToGpu(GrPrimitiveType, const GrMesh&, int indexCount,
-                                          int baseIndex, uint16_t minIndexValue,
-                                          uint16_t maxIndexValue, int baseVertex) = 0;
-        virtual void sendInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int vertexCount,
-                                            int baseVertex, int instanceCount,
-                                            int baseInstance) = 0;
-        virtual void sendIndexedInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int indexCount,
-                                                   int baseIndex, int baseVertex, int instanceCount,
-                                                   int baseInstance) = 0;
-        virtual ~SendToGpuImpl() {}
-    };
-
-    void sendToGpu(GrPrimitiveType, SendToGpuImpl*) const;
+    void draw(GrOpsRenderPass*) const;
 
 private:
     enum class Flags : uint8_t {
@@ -212,32 +197,33 @@
     fBaseVertex = baseVertex;
 }
 
-inline void GrMesh::sendToGpu(GrPrimitiveType primitiveType, SendToGpuImpl* impl) const {
+inline void GrMesh::draw(GrOpsRenderPass* opsRenderPass) const {
     if (this->isInstanced()) {
         if (!this->isIndexed()) {
-            impl->sendInstancedMeshToGpu(primitiveType, *this, fInstanceNonIndexData.fVertexCount,
-                                         fBaseVertex, fInstanceData.fInstanceCount,
-                                         fInstanceData.fBaseInstance);
+            opsRenderPass->drawInstanced(fInstanceBuffer.get(), fInstanceData.fInstanceCount,
+                                         fInstanceData.fBaseInstance, fVertexBuffer.get(),
+                                         fInstanceNonIndexData.fVertexCount, fBaseVertex);
         } else {
-            impl->sendIndexedInstancedMeshToGpu(
-                    primitiveType, *this, fInstanceIndexData.fIndexCount, 0, fBaseVertex,
-                    fInstanceData.fInstanceCount, fInstanceData.fBaseInstance);
+            opsRenderPass->drawIndexedInstanced(
+                    fIndexBuffer.get(), fInstanceIndexData.fIndexCount, 0, this->primitiveRestart(),
+                    fInstanceBuffer.get(), fInstanceData.fInstanceCount,
+                    fInstanceData.fBaseInstance, fVertexBuffer.get(), fBaseVertex);
         }
         return;
     }
 
     if (!this->isIndexed()) {
         SkASSERT(fNonIndexNonInstanceData.fVertexCount > 0);
-        impl->sendArrayMeshToGpu(primitiveType, *this, fNonIndexNonInstanceData.fVertexCount,
-                                 fBaseVertex);
+        opsRenderPass->draw(fVertexBuffer.get(), fNonIndexNonInstanceData.fVertexCount,
+                            fBaseVertex);
         return;
     }
 
     if (0 == fIndexData.fPatternRepeatCount) {
-        impl->sendIndexedMeshToGpu(primitiveType, *this, fIndexData.fIndexCount,
-                                   fNonPatternIndexData.fBaseIndex,
-                                   fNonPatternIndexData.fMinIndexValue,
-                                   fNonPatternIndexData.fMaxIndexValue, fBaseVertex);
+        opsRenderPass->drawIndexed(
+                fIndexBuffer.get(), fIndexData.fIndexCount, fNonPatternIndexData.fBaseIndex,
+                this->primitiveRestart(), fNonPatternIndexData.fMinIndexValue,
+                fNonPatternIndexData.fMaxIndexValue, fVertexBuffer.get(), fBaseVertex);
         return;
     }
 
@@ -251,8 +237,8 @@
         int minIndexValue = 0;
         int maxIndexValue = fPatternData.fVertexCount * repeatCount - 1;
         SkASSERT(!(fFlags & Flags::kUsePrimitiveRestart));
-        impl->sendIndexedMeshToGpu(primitiveType, *this, indexCount, 0, minIndexValue,
-                                   maxIndexValue,
+        opsRenderPass->drawIndexed(fIndexBuffer.get(), indexCount, 0, this->primitiveRestart(),
+                                   minIndexValue, maxIndexValue, fVertexBuffer.get(),
                                    fBaseVertex + fPatternData.fVertexCount * baseRepetition);
         baseRepetition += repeatCount;
     } while (baseRepetition < fIndexData.fPatternRepeatCount);
diff --git a/src/gpu/GrOpsRenderPass.cpp b/src/gpu/GrOpsRenderPass.cpp
index ed82228..0eb38eb 100644
--- a/src/gpu/GrOpsRenderPass.cpp
+++ b/src/gpu/GrOpsRenderPass.cpp
@@ -145,26 +145,71 @@
             this->bindTextures(programInfo.primProc(), programInfo.pipeline(),
                                programInfo.dynamicPrimProcTextures(i));
         }
-        this->drawMesh(programInfo.primitiveType(), meshes[i]);
+        meshes[i].draw(this);
     }
 }
 
-void GrOpsRenderPass::drawMesh(GrPrimitiveType primitiveType, const GrMesh& mesh) {
+bool GrOpsRenderPass::prepareToDraw() {
     if (DrawPipelineStatus::kOk != fDrawPipelineStatus) {
         SkASSERT(DrawPipelineStatus::kNotConfigured != fDrawPipelineStatus);
         this->gpu()->stats()->incNumFailedDraws();
-        return;
+        return false;
     }
-
     SkASSERT(DynamicStateStatus::kUninitialized != fScissorStatus);
     SkASSERT(DynamicStateStatus::kUninitialized != fTextureBindingStatus);
-    SkASSERT(SkToBool(mesh.vertexBuffer()) == fHasVertexAttributes);
-    SkASSERT(SkToBool(mesh.instanceBuffer()) == fHasInstanceAttributes);
-    SkASSERT(GrPrimitiveRestart::kNo == mesh.primitiveRestart() ||
-             this->gpu()->caps()->usePrimitiveRestart());
 
     if (kNone_GrXferBarrierType != fXferBarrierType) {
         this->gpu()->xferBarrier(fRenderTarget, fXferBarrierType);
     }
-    this->onDrawMesh(primitiveType, mesh);
+    return true;
+}
+
+void GrOpsRenderPass::draw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) {
+    if (!this->prepareToDraw()) {
+        return;
+    }
+    SkASSERT(SkToBool(vertexBuffer) == fHasVertexAttributes);
+    this->onDraw(vertexBuffer, vertexCount, baseVertex);
+}
+
+void GrOpsRenderPass::drawIndexed(const GrBuffer* indexBuffer, int indexCount,
+                                  int baseIndex, GrPrimitiveRestart primitiveRestart,
+                                  uint16_t minIndexValue, uint16_t maxIndexValue,
+                                  const GrBuffer* vertexBuffer, int baseVertex) {
+    if (!this->prepareToDraw()) {
+        return;
+    }
+    SkASSERT(GrPrimitiveRestart::kNo == primitiveRestart ||
+             this->gpu()->caps()->usePrimitiveRestart());
+    SkASSERT(SkToBool(vertexBuffer) == fHasVertexAttributes);
+    this->onDrawIndexed(indexBuffer, indexCount, baseIndex, primitiveRestart, minIndexValue,
+                        maxIndexValue, vertexBuffer, baseVertex);
+}
+
+void GrOpsRenderPass::drawInstanced(const GrBuffer* instanceBuffer, int instanceCount, int
+                                    baseInstance, const GrBuffer* vertexBuffer, int vertexCount,
+                                    int baseVertex) {
+    if (!this->prepareToDraw()) {
+        return;
+    }
+    SkASSERT(SkToBool(vertexBuffer) == fHasVertexAttributes);
+    SkASSERT(SkToBool(instanceBuffer) == fHasInstanceAttributes);
+    this->onDrawInstanced(instanceBuffer, instanceCount, baseInstance, vertexBuffer, vertexCount,
+                          baseVertex);
+}
+
+void GrOpsRenderPass::drawIndexedInstanced(
+        const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+        GrPrimitiveRestart primitiveRestart, const GrBuffer* instanceBuffer, int instanceCount,
+        int baseInstance, const GrBuffer* vertexBuffer, int baseVertex) {
+    if (!this->prepareToDraw()) {
+        return;
+    }
+    SkASSERT(GrPrimitiveRestart::kNo == primitiveRestart ||
+             this->gpu()->caps()->usePrimitiveRestart());
+    SkASSERT(SkToBool(vertexBuffer) == fHasVertexAttributes);
+    SkASSERT(SkToBool(instanceBuffer) == fHasInstanceAttributes);
+    this->onDrawIndexedInstanced(indexBuffer, indexCount, baseIndex, primitiveRestart,
+                                 instanceBuffer, instanceCount, baseInstance, vertexBuffer,
+                                 baseVertex);
 }
diff --git a/src/gpu/GrOpsRenderPass.h b/src/gpu/GrOpsRenderPass.h
index 0bea996..a96c6d5 100644
--- a/src/gpu/GrOpsRenderPass.h
+++ b/src/gpu/GrOpsRenderPass.h
@@ -76,9 +76,18 @@
     // setScissor() and bindTextures() on the client's behalf.
     void drawMeshes(const GrProgramInfo&, const GrMesh[], int meshCount);
 
-    // Draws the given mesh using the current pipeline state. The client must call bindPipeline(),
-    // followed setScissor() and/or bindTextures() if necessary, before using this method.
-    void drawMesh(GrPrimitiveType, const GrMesh&);
+    // These methods issue draws using the current pipeline state. The client must call
+    // bindPipeline(), followed by setScissor() and/or bindTextures() if applicable, before using
+    // these methods.
+    void draw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex);
+    void drawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex, GrPrimitiveRestart,
+                     uint16_t minIndexValue, uint16_t maxIndexValue, const GrBuffer* vertexBuffer,
+                     int baseVertex);
+    void drawInstanced(const GrBuffer* instanceBuffer, int instanceCount, int baseInstance,
+                       const GrBuffer* vertexBuffer, int vertexCount, int baseVertex);
+    void drawIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                              GrPrimitiveRestart, const GrBuffer* instanceBuffer, int instanceCount,
+                              int baseInstance, const GrBuffer* vertexBuffer, int baseVertex);
 
     // Performs an upload of vertex data in the middle of a set of a set of draws
     virtual void inlineUpload(GrOpFlushState*, GrDeferredTextureUploadFn&) = 0;
@@ -116,12 +125,24 @@
 private:
     virtual GrGpu* gpu() = 0;
 
+    bool prepareToDraw();
+
     // overridden by backend-specific derived class to perform the rendering command.
     virtual bool onBindPipeline(const GrProgramInfo&, const SkRect& drawBounds) = 0;
     virtual void onSetScissorRect(const SkIRect&) = 0;
     virtual bool onBindTextures(const GrPrimitiveProcessor&, const GrPipeline&,
-                                const GrSurfaceProxy* const primProcTextures[] = nullptr) = 0;
-    virtual void onDrawMesh(GrPrimitiveType, const GrMesh&) = 0;
+                                const GrSurfaceProxy* const primProcTextures[]) = 0;
+    virtual void onDraw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) = 0;
+    virtual void onDrawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                               GrPrimitiveRestart, uint16_t minIndexValue, uint16_t maxIndexValue,
+                               const GrBuffer* vertexBuffer, int baseVertex) = 0;
+    virtual void onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount,
+                                 int baseInstance, const GrBuffer* vertexBuffer, int vertexCount,
+                                 int baseVertex) = 0;
+    virtual void onDrawIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                                        GrPrimitiveRestart, const GrBuffer* instanceBuffer,
+                                        int instanceCount, int baseInstance,
+                                        const GrBuffer* vertexBuffer, int baseVertex) = 0;
     virtual void onClear(const GrFixedClip&, const SkPMColor4f&) = 0;
     virtual void onClearStencilClip(const GrFixedClip&, bool insideStencilMask) = 0;
     virtual void onExecuteDrawable(std::unique_ptr<SkDrawable::GpuDrawHandler>) {}
diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h
index 3274a14..4069344 100644
--- a/src/gpu/GrProcessor.h
+++ b/src/gpu/GrProcessor.h
@@ -101,7 +101,7 @@
         kGrConicEffect_ClassID,
         kGrConstColorProcessor_ClassID,
         kGrConvexPolyEffect_ClassID,
-        kGrDeviceSpaceTextureDecalFragmentProcessor_ClassID,
+        kGrDeviceSpaceEffect_ClassID,
         kGrDiffuseLightingEffect_ClassID,
         kGrDisplacementMapEffect_ClassID,
         kGrDistanceFieldA8TextGeoProc_ClassID,
diff --git a/src/gpu/GrShaderCaps.h b/src/gpu/GrShaderCaps.h
index 4785275..2563d66 100644
--- a/src/gpu/GrShaderCaps.h
+++ b/src/gpu/GrShaderCaps.h
@@ -321,6 +321,7 @@
 
     friend class GrCaps;  // For initialization.
     friend class GrDawnCaps;
+    friend class GrD3DCaps;
     friend class GrGLCaps;
     friend class GrMockCaps;
     friend class GrMtlCaps;
diff --git a/src/gpu/d3d/GrD3DCaps.cpp b/src/gpu/d3d/GrD3DCaps.cpp
index 2db3edb..43592fb 100644
--- a/src/gpu/d3d/GrD3DCaps.cpp
+++ b/src/gpu/d3d/GrD3DCaps.cpp
@@ -6,6 +6,7 @@
  */
 
 #include "include/gpu/GrBackendSurface.h"
+#include "include/gpu/d3d/GrD3D12.h"
 #include "include/gpu/d3d/GrD3DBackendContext.h"
 
 #include "src/gpu/GrProgramDesc.h"
@@ -13,13 +14,14 @@
 #include "src/gpu/d3d/GrD3DCaps.h"
 #include "src/gpu/d3d/GrD3DGpu.h"
 
-GrD3DCaps::GrD3DCaps(const GrContextOptions& contextOptions, GrProtected isProtected)
+GrD3DCaps::GrD3DCaps(const GrContextOptions& contextOptions, IDXGIAdapter1* adapter,
+                     ID3D12Device* device)
         : INHERITED(contextOptions) {
     /**************************************************************************
      * GrCaps fields
      **************************************************************************/
     fMipMapSupport = true;   // always available in Direct3D
-    fNPOTTextureTileSupport = false;  // TODO: figure this out
+    fNPOTTextureTileSupport = true;  // available in feature level 10_0 and up
     fReuseScratchTextures = true; //TODO: figure this out
     fGpuTracingSupport = false; //TODO: figure this out
     fOversizedStencilSupport = false; //TODO: figure this out
@@ -35,33 +37,170 @@
     fReadPixelsRowBytesSupport = true;
     fWritePixelsRowBytesSupport = true;
 
-    // TODO: figure this out
+    // TODO: implement these
     fTransferFromBufferToTextureSupport = false;
     fTransferFromSurfaceToBufferSupport = false;
 
-    // TODO: figure this out
-    fMaxRenderTargetSize = 4096;
-    fMaxTextureSize = 4096;
+    fMaxRenderTargetSize = 16384;  // minimum required by feature level 11_0
+    fMaxTextureSize = 16384;       // minimum required by feature level 11_0
 
-    fDynamicStateArrayGeometryProcessorTextureSupport = false; // TODO: figure this out
+    // TODO: implement
+    fDynamicStateArrayGeometryProcessorTextureSupport = false;
 
     fShaderCaps.reset(new GrShaderCaps(contextOptions));
 
-    this->init(contextOptions);
+    this->init(contextOptions, adapter, device);
 }
 
 bool GrD3DCaps::onCanCopySurface(const GrSurfaceProxy* dst, const GrSurfaceProxy* src,
-                                const SkIRect& srcRect, const SkIPoint& dstPoint) const {
+                                 const SkIRect& srcRect, const SkIPoint& dstPoint) const {
     return false;
 }
 
-void GrD3DCaps::init(const GrContextOptions& contextOptions) {
-    // TODO
+void GrD3DCaps::init(const GrContextOptions& contextOptions, IDXGIAdapter1* adapter,
+                     ID3D12Device* device) {
+    D3D_FEATURE_LEVEL featureLevels[] = {
+        D3D_FEATURE_LEVEL_11_0,
+        D3D_FEATURE_LEVEL_11_1,
+        D3D_FEATURE_LEVEL_12_0,
+        D3D_FEATURE_LEVEL_12_1,
+    };
+    D3D12_FEATURE_DATA_FEATURE_LEVELS flDesc = {};
+    flDesc.NumFeatureLevels = _countof(featureLevels);
+    flDesc.pFeatureLevelsRequested = featureLevels;
+    HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &flDesc, sizeof(flDesc));
+    SkASSERT(SUCCEEDED(hr));
+    // This had better be true
+    SkASSERT(flDesc.MaxSupportedFeatureLevel >= D3D_FEATURE_LEVEL_11_0);
+
+    DXGI_ADAPTER_DESC adapterDesc;
+    hr = adapter->GetDesc(&adapterDesc);
+    SkASSERT(SUCCEEDED(hr));
+
+    D3D12_FEATURE_DATA_D3D12_OPTIONS optionsDesc;
+    hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &optionsDesc,
+                                     sizeof(optionsDesc));
+    SkASSERT(SUCCEEDED(hr));
+
+    D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2Desc;
+    hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &optionsDesc,
+                                     sizeof(options2Desc));
+    SkASSERT(SUCCEEDED(hr));
+
+    // See https://docs.microsoft.com/en-us/windows/win32/direct3d12/hardware-support
+    if (D3D12_RESOURCE_BINDING_TIER_1 == optionsDesc.ResourceBindingTier) {
+        fMaxPerStageShaderResourceViews = 128;
+        if (D3D_FEATURE_LEVEL_11_0 == flDesc.MaxSupportedFeatureLevel) {
+            fMaxPerStageUnorderedAccessViews = 8;
+        } else {
+            fMaxPerStageUnorderedAccessViews = 64;
+        }
+    } else {
+        // The doc above says "full heap", but practically it seems like it should be
+        // limited by the maximum number of samplers in a heap
+        fMaxPerStageUnorderedAccessViews = 2032;
+        fMaxPerStageShaderResourceViews = 2032;
+    }
+
+    this->initGrCaps(optionsDesc, options2Desc);
+    this->initShaderCaps(adapterDesc.VendorId, optionsDesc);
+
+    // TODO: set up formats and stencil
+
+    if (!contextOptions.fDisableDriverCorrectnessWorkarounds) {
+        this->applyDriverCorrectnessWorkarounds(adapterDesc.VendorId);
+    }
 
     this->finishInitialization(contextOptions);
 }
 
+void GrD3DCaps::initGrCaps(const D3D12_FEATURE_DATA_D3D12_OPTIONS& optionsDesc,
+                           const D3D12_FEATURE_DATA_D3D12_OPTIONS2& options2Desc) {
+    // There doesn't seem to be a property for this, and setting it to MAXINT makes tests which test
+    // all the vertex attribs time out looping over that many. For now, we'll cap this at 64 max and
+    // can raise it if we ever find that need.
+    fMaxVertexAttributes = 64;
 
+    // TODO: we can set locations but not sure if we can query them
+    fSampleLocationsSupport = false;
+
+    if (D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED !=
+            options2Desc.ProgrammableSamplePositionsTier) {
+        // We "disable" multisample by colocating all samples at pixel center.
+        fMultisampleDisableSupport = true;
+    }
+
+    // TODO: It's not clear if this is supported or not.
+    fMixedSamplesSupport = false;
+
+    if (D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED !=
+            optionsDesc.ConservativeRasterizationTier) {
+        fConservativeRasterSupport = true;
+    }
+
+    fWireframeSupport = true;
+
+    // Feature level 11_0 and up support up to 16K in texture dimension
+    fMaxTextureSize = 16384;
+    // There's no specific cap for RT size, so use texture size
+    fMaxRenderTargetSize = fMaxTextureSize;
+    if (fDriverBugWorkarounds.max_texture_size_limit_4096) {
+        fMaxTextureSize = std::min(fMaxTextureSize, 4096);
+    }
+    // Our render targets are always created with textures as the color
+    // attachment, hence this min:
+    fMaxRenderTargetSize = fMaxTextureSize;
+
+    fMaxPreferredRenderTargetSize = fMaxRenderTargetSize;
+
+    // Assuming since we will always map in the end to upload the data we might as well just map
+    // from the get go. There is no hard data to suggest this is faster or slower.
+    fBufferMapThreshold = 0;
+
+    fMapBufferFlags = kCanMap_MapFlag | kSubset_MapFlag | kAsyncRead_MapFlag;
+
+    fOversizedStencilSupport = true;
+
+    // Advanced blend modes don't appear to be supported.
+}
+
+void GrD3DCaps::initShaderCaps(int vendorID, const D3D12_FEATURE_DATA_D3D12_OPTIONS& optionsDesc) {
+    GrShaderCaps* shaderCaps = fShaderCaps.get();
+    shaderCaps->fVersionDeclString = "#version 330\n";
+
+    // Shader Model 5 supports all of the following:
+    shaderCaps->fUsesPrecisionModifiers = true;
+    shaderCaps->fFlatInterpolationSupport = true;
+    // Flat interpolation appears to be slow on Qualcomm GPUs. This was tested in GL and is assumed
+    // to be true with D3D as well.
+    shaderCaps->fPreferFlatInterpolation = kQualcomm_D3DVendor != vendorID;
+
+    shaderCaps->fSampleMaskSupport = true;
+
+    shaderCaps->fShaderDerivativeSupport = true;
+
+    shaderCaps->fGeometryShaderSupport = shaderCaps->fGSInvocationsSupport = true;
+
+    shaderCaps->fDualSourceBlendingSupport = true;
+
+    shaderCaps->fIntegerSupport = true;
+    shaderCaps->fVertexIDSupport = true;
+    shaderCaps->fFPManipulationSupport = true;
+
+    shaderCaps->fFloatIs32Bits = true;
+    shaderCaps->fHalfIs32Bits =
+        D3D12_SHADER_MIN_PRECISION_SUPPORT_NONE == optionsDesc.MinPrecisionSupport;
+
+    // See https://docs.microsoft.com/en-us/windows/win32/direct3d12/hardware-support
+    // The maximum number of samplers in a shader-visible descriptor heap is 2048, but
+    // 16 of those are reserved for the driver.
+    shaderCaps->fMaxFragmentSamplers =
+        (D3D12_RESOURCE_BINDING_TIER_1 == optionsDesc.ResourceBindingTier) ? 16 : 2032;
+}
+
+void GrD3DCaps::applyDriverCorrectnessWorkarounds(int vendorID) {
+    // Nothing yet.
+}
 
 bool GrD3DCaps::isFormatSRGB(const GrBackendFormat& format) const {
     // TODO
@@ -74,7 +213,7 @@
 }
 
 bool GrD3DCaps::isFormatTexturableAndUploadable(GrColorType ct,
-                                               const GrBackendFormat& format) const {
+                                                const GrBackendFormat& format) const {
     // TODO
     return false;
 }
@@ -85,7 +224,7 @@
 }
 
 bool GrD3DCaps::isFormatAsColorTypeRenderable(GrColorType ct, const GrBackendFormat& format,
-                                             int sampleCount) const {
+                                              int sampleCount) const {
     if (!this->isFormatRenderable(format, sampleCount)) {
         return false;
     }
@@ -99,7 +238,7 @@
 }
 
 int GrD3DCaps::getRenderTargetSampleCount(int requestedCount,
-                                         const GrBackendFormat& format) const {
+                                          const GrBackendFormat& format) const {
     // TODO
     return 0;
 }
@@ -115,8 +254,8 @@
 }
 
 GrCaps::SupportedWrite GrD3DCaps::supportedWritePixelsColorType(GrColorType surfaceColorType,
-                                                               const GrBackendFormat& surfaceFormat,
-                                                               GrColorType srcColorType) const {
+                                                                const GrBackendFormat& surfaceFormat,
+                                                                GrColorType srcColorType) const {
     // TODO
     return {GrColorType::kUnknown, 0};
 }
@@ -136,19 +275,19 @@
 }
 
 bool GrD3DCaps::onAreColorTypeAndFormatCompatible(GrColorType ct,
-                                                 const GrBackendFormat& format) const {
+                                                  const GrBackendFormat& format) const {
     // TODO
     return false;
 }
 
 GrColorType GrD3DCaps::getYUVAColorTypeFromBackendFormat(const GrBackendFormat& format,
-                                                        bool isAlphaChannel) const {
+                                                         bool isAlphaChannel) const {
     // TODO
     return GrColorType::kUnknown;
 }
 
 GrBackendFormat GrD3DCaps::onGetDefaultBackendFormat(GrColorType ct,
-                                                    GrRenderable renderable) const {
+                                                     GrRenderable renderable) const {
     // TODO
     return GrBackendFormat();
 }
@@ -182,15 +321,16 @@
 }
 
 void GrD3DCaps::addExtraSamplerKey(GrProcessorKeyBuilder* b,
-                                  GrSamplerState samplerState,
-                                  const GrBackendFormat& format) const {
+                                   GrSamplerState samplerState,
+                                   const GrBackendFormat& format) const {
     // TODO
 }
 
 /**
- * TODO: Determin what goes in the ProgramDesc
+ * TODO: Determine what goes in the ProgramDesc
  */
-GrProgramDesc GrD3DCaps::makeDesc(const GrRenderTarget* rt, const GrProgramInfo& programInfo) const {
+GrProgramDesc GrD3DCaps::makeDesc(const GrRenderTarget* rt,
+                                  const GrProgramInfo& programInfo) const {
     GrProgramDesc desc;
     if (!GrProgramDesc::Build(&desc, rt, programInfo, *this)) {
         SkASSERT(!desc.isValid());
diff --git a/src/gpu/d3d/GrD3DCaps.h b/src/gpu/d3d/GrD3DCaps.h
index 7efaa2a1..8be8301 100644
--- a/src/gpu/d3d/GrD3DCaps.h
+++ b/src/gpu/d3d/GrD3DCaps.h
@@ -8,6 +8,7 @@
 #ifndef GrD3DCaps_DEFINED
 #define GrD3DCaps_DEFINED
 
+#include "include/gpu/d3d/GrD3D12.h"
 #include "src/gpu/GrCaps.h"
 
 class GrShaderCaps;
@@ -21,7 +22,7 @@
      * Creates a GrD3DCaps that is set such that nothing is supported. The init function should
      * be called to fill out the caps.
      */
-    GrD3DCaps(const GrContextOptions& contextOptions, GrProtected isProtected = GrProtected::kNo);
+    GrD3DCaps(const GrContextOptions& contextOptions, IDXGIAdapter1*, ID3D12Device*);
 
     bool isFormatSRGB(const GrBackendFormat&) const override;
     SkImage::CompressionType compressionType(const GrBackendFormat&) const override;
@@ -68,7 +69,22 @@
 #endif
 
 private:
-    void init(const GrContextOptions& contextOptions);
+    enum D3DVendor {
+        kAMD_D3DVendor = 0x1002,
+        kARM_D3DVendor = 0x13B5,
+        kImagination_D3DVendor = 0x1010,
+        kIntel_D3DVendor = 0x8086,
+        kNVIDIA_D3DVendor = 0x10DE,
+        kQualcomm_D3DVendor = 0x5143,
+    };
+
+    void init(const GrContextOptions& contextOptions, IDXGIAdapter1*, ID3D12Device*);
+
+    void initGrCaps(const D3D12_FEATURE_DATA_D3D12_OPTIONS&,
+                    const D3D12_FEATURE_DATA_D3D12_OPTIONS2&);
+    void initShaderCaps(int vendorID, const D3D12_FEATURE_DATA_D3D12_OPTIONS& optionsDesc);
+
+    void applyDriverCorrectnessWorkarounds(int vendorID);
 
     bool onSurfaceSupportsWritePixels(const GrSurface*) const override;
     bool onCanCopySurface(const GrSurfaceProxy* dst, const GrSurfaceProxy* src,
@@ -80,6 +96,9 @@
     SupportedRead onSupportedReadPixelsColorType(GrColorType, const GrBackendFormat&,
                                                  GrColorType) const override;
 
+    int fMaxPerStageShaderResourceViews;
+    int fMaxPerStageUnorderedAccessViews;
+
     typedef GrCaps INHERITED;
 };
 
diff --git a/src/gpu/d3d/GrD3DGpu.cpp b/src/gpu/d3d/GrD3DGpu.cpp
index 5a78910..579b4c1 100644
--- a/src/gpu/d3d/GrD3DGpu.cpp
+++ b/src/gpu/d3d/GrD3DGpu.cpp
@@ -19,7 +19,9 @@
         , fDevice(backendContext.fDevice)
         , fQueue(backendContext.fQueue)
         , fProtectedContext(backendContext.fProtectedContext) {
-    fCaps.reset(new GrD3DCaps(contextOptions));
+    fCaps.reset(new GrD3DCaps(contextOptions,
+                              backendContext.fAdapter.Get(),
+                              backendContext.fDevice.Get()));
 }
 
 GrOpsRenderPass* GrD3DGpu::getOpsRenderPass(
diff --git a/src/gpu/dawn/GrDawnOpsRenderPass.cpp b/src/gpu/dawn/GrDawnOpsRenderPass.cpp
index 504a231..fa35169 100644
--- a/src/gpu/dawn/GrDawnOpsRenderPass.cpp
+++ b/src/gpu/dawn/GrDawnOpsRenderPass.cpp
@@ -120,7 +120,7 @@
 
 void GrDawnOpsRenderPass::setScissorState(const GrProgramInfo& programInfo) {
     SkIRect rect;
-    if (programInfo.pipeline().isScissorEnabled()) {
+    if (programInfo.pipeline().isScissorTestEnabled()) {
         constexpr SkIRect kBogusScissor{0, 0, 1, 1};
         rect = programInfo.hasFixedScissor() ? programInfo.fixedScissor() : kBogusScissor;
         if (kBottomLeft_GrSurfaceOrigin == fOrigin) {
@@ -150,31 +150,25 @@
 
 bool GrDawnOpsRenderPass::onBindPipeline(const GrProgramInfo& programInfo,
                                          const SkRect& drawBounds) {
+    fCurrentProgram = fGpu->getOrCreateRenderPipeline(fRenderTarget, programInfo);
+    this->applyState(fCurrentProgram.get(), programInfo);
     return true;
 }
 
-void GrDawnOpsRenderPass::onDrawMeshes(const GrProgramInfo& programInfo,
-                                       const GrMesh meshes[],
-                                       int meshCount) {
-    if (!meshCount) {
-        return;
-    }
-    sk_sp<GrDawnProgram> program = fGpu->getOrCreateRenderPipeline(fRenderTarget, programInfo);
-    if (!programInfo.hasDynamicPrimProcTextures()) {
-        auto textures = programInfo.hasFixedPrimProcTextures() ? programInfo.fixedPrimProcTextures()
-                                                               : nullptr;
-        auto bindGroup = program->setTextures(fGpu, programInfo, textures);
-        fPassEncoder.SetBindGroup(1, bindGroup, 0, nullptr);
-    }
-    for (int i = 0; i < meshCount; ++i) {
-        if (programInfo.hasDynamicPrimProcTextures()) {
-            auto textures = programInfo.dynamicPrimProcTextures(i);
-            auto bindGroup = program->setTextures(fGpu, programInfo, textures);
-            fPassEncoder.SetBindGroup(1, bindGroup, 0, nullptr);
-        }
-        this->applyState(program.get(), programInfo);
-        meshes[i].sendToGpu(programInfo.primitiveType(), this);
-    }
+void GrDawnOpsRenderPass::onSetScissorRect(const SkIRect&) {
+}
+
+bool GrDawnOpsRenderPass::onBindTextures(const GrPrimitiveProcessor& primProc,
+                                         const GrPipeline& pipeline,
+                                         const GrSurfaceProxy* const textures[]) {
+    auto bindGroup = fCurrentProgram->setTextures(fGpu, primProc, pipeline, textures);
+    fPassEncoder.SetBindGroup(1, bindGroup, 0, nullptr);
+    return true;
+}
+
+void GrDawnOpsRenderPass::onDrawMesh(GrPrimitiveType primitiveType,
+                                     const GrMesh& mesh) {
+    mesh.sendToGpu(primitiveType, this);
 }
 
 void GrDawnOpsRenderPass::sendInstancedMeshToGpu(GrPrimitiveType, const GrMesh& mesh,
diff --git a/src/gpu/dawn/GrDawnOpsRenderPass.h b/src/gpu/dawn/GrDawnOpsRenderPass.h
index 95c6dde..51984d4 100644
--- a/src/gpu/dawn/GrDawnOpsRenderPass.h
+++ b/src/gpu/dawn/GrDawnOpsRenderPass.h
@@ -42,9 +42,10 @@
     void applyState(GrDawnProgram*, const GrProgramInfo& programInfo);
 
     bool onBindPipeline(const GrProgramInfo& programInfo, const SkRect& drawBounds) override;
-    void onDrawMeshes(const GrProgramInfo& programInfo,
-                      const GrMesh mesh[],
-                      int meshCount) override;
+    void onSetScissorRect(const SkIRect&) override;
+    bool onBindTextures(const GrPrimitiveProcessor&, const GrPipeline&,
+                        const GrSurfaceProxy* const primProcTextures[]) override;
+    void onDrawMesh(GrPrimitiveType, const GrMesh&) override;
 
     void sendArrayMeshToGpu(GrPrimitiveType type, const GrMesh& mesh, int vertexCount,
                             int baseVertex) final {
@@ -78,6 +79,7 @@
     GrDawnGpu*                  fGpu;
     wgpu::CommandEncoder        fEncoder;
     wgpu::RenderPassEncoder     fPassEncoder;
+    sk_sp<GrDawnProgram>        fCurrentProgram;
     LoadAndStoreInfo            fColorInfo;
 
     typedef GrOpsRenderPass     INHERITED;
diff --git a/src/gpu/dawn/GrDawnProgramBuilder.cpp b/src/gpu/dawn/GrDawnProgramBuilder.cpp
index 1239905..9c8b9ec 100644
--- a/src/gpu/dawn/GrDawnProgramBuilder.cpp
+++ b/src/gpu/dawn/GrDawnProgramBuilder.cpp
@@ -536,12 +536,11 @@
 }
 
 wgpu::BindGroup GrDawnProgram::setTextures(GrDawnGpu* gpu,
-                                           const GrProgramInfo& programInfo,
+                                           const GrPrimitiveProcessor& primProc,
+                                           const GrPipeline& pipeline,
                                            const GrSurfaceProxy* const primProcTextures[]) {
     std::vector<wgpu::BindGroupBinding> bindings;
     int binding = 0;
-    const GrPipeline& pipeline = programInfo.pipeline();
-    const GrPrimitiveProcessor& primProc = programInfo.primProc();
     if (primProcTextures) {
         for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
             SkASSERT(primProcTextures[i]->asTextureProxy());
diff --git a/src/gpu/dawn/GrDawnProgramBuilder.h b/src/gpu/dawn/GrDawnProgramBuilder.h
index 004ab1f..42d1391 100644
--- a/src/gpu/dawn/GrDawnProgramBuilder.h
+++ b/src/gpu/dawn/GrDawnProgramBuilder.h
@@ -66,7 +66,8 @@
     void setRenderTargetState(const GrRenderTarget*, GrSurfaceOrigin);
     wgpu::BindGroup setUniformData(GrDawnGpu*, const GrRenderTarget*, const GrProgramInfo&);
     wgpu::BindGroup setTextures(GrDawnGpu* gpu,
-                                const GrProgramInfo& programInfo,
+                                const GrPrimitiveProcessor& primProc,
+                                const GrPipeline& pipeline,
                                 const GrSurfaceProxy* const primProcTextures[]);
 };
 
diff --git a/src/gpu/effects/GrDeviceSpaceEffect.fp b/src/gpu/effects/GrDeviceSpaceEffect.fp
new file mode 100644
index 0000000..d817ec2
--- /dev/null
+++ b/src/gpu/effects/GrDeviceSpaceEffect.fp
@@ -0,0 +1,22 @@
+/*
+* Copyright 2020 Google LLC
+*
+* Use of this source code is governed by a BSD-style license that can be
+* found in the LICENSE file.
+*/
+
+in fragmentProcessor fp;
+
+void main() {
+     sk_OutColor = sample(fp, sk_InColor, sk_FragCoord.xy);
+}
+
+@test(d) {
+    std::unique_ptr<GrFragmentProcessor> fp;
+    // We have a restriction that explicit coords only work for FPs with exactly one
+    // coord transform.
+    do {
+        fp = GrProcessorUnitTest::MakeChildFP(d);
+    } while (fp->numCoordTransforms() != 1);
+    return GrDeviceSpaceEffect::Make(std::move(fp));
+}
diff --git a/src/gpu/effects/GrTextureDomain.cpp b/src/gpu/effects/GrTextureDomain.cpp
index 59c3dfa..9d8202c 100644
--- a/src/gpu/effects/GrTextureDomain.cpp
+++ b/src/gpu/effects/GrTextureDomain.cpp
@@ -314,123 +314,3 @@
         std::copy_n(decalFilterWeights, 3, fPrevDeclFilterWeights);
     }
 }
-
-///////////////////////////////////////////////////////////////////////////////
-
-std::unique_ptr<GrFragmentProcessor> GrDeviceSpaceTextureDecalFragmentProcessor::Make(
-        GrSurfaceProxyView view, const SkIRect& subset, const SkIPoint& deviceSpaceOffset) {
-    return std::unique_ptr<GrFragmentProcessor>(new GrDeviceSpaceTextureDecalFragmentProcessor(
-            std::move(view), subset, deviceSpaceOffset));
-}
-
-GrDeviceSpaceTextureDecalFragmentProcessor::GrDeviceSpaceTextureDecalFragmentProcessor(
-        GrSurfaceProxyView view, const SkIRect& subset, const SkIPoint& deviceSpaceOffset)
-        : INHERITED(kGrDeviceSpaceTextureDecalFragmentProcessor_ClassID,
-                    kCompatibleWithCoverageAsAlpha_OptimizationFlag)
-        , fTextureDomain(view.proxy(),
-                         GrTextureDomain::MakeTexelDomain(subset, GrTextureDomain::kDecal_Mode),
-                         GrTextureDomain::kDecal_Mode, GrTextureDomain::kDecal_Mode)
-        , fTextureSampler(std::move(view), GrSamplerState::Filter::kNearest) {
-    this->setTextureSamplerCnt(1);
-    fDeviceSpaceOffset.fX = deviceSpaceOffset.fX - subset.fLeft;
-    fDeviceSpaceOffset.fY = deviceSpaceOffset.fY - subset.fTop;
-}
-
-GrDeviceSpaceTextureDecalFragmentProcessor::GrDeviceSpaceTextureDecalFragmentProcessor(
-        const GrDeviceSpaceTextureDecalFragmentProcessor& that)
-        : INHERITED(kGrDeviceSpaceTextureDecalFragmentProcessor_ClassID,
-                    kCompatibleWithCoverageAsAlpha_OptimizationFlag)
-        , fTextureDomain(that.fTextureDomain)
-        , fTextureSampler(that.fTextureSampler)
-        , fDeviceSpaceOffset(that.fDeviceSpaceOffset) {
-    this->setTextureSamplerCnt(1);
-}
-
-std::unique_ptr<GrFragmentProcessor> GrDeviceSpaceTextureDecalFragmentProcessor::clone() const {
-    return std::unique_ptr<GrFragmentProcessor>(
-            new GrDeviceSpaceTextureDecalFragmentProcessor(*this));
-}
-
-GrGLSLFragmentProcessor* GrDeviceSpaceTextureDecalFragmentProcessor::onCreateGLSLInstance() const  {
-    class GLSLProcessor : public GrGLSLFragmentProcessor {
-    public:
-        void emitCode(EmitArgs& args) override {
-            const GrDeviceSpaceTextureDecalFragmentProcessor& dstdfp =
-                    args.fFp.cast<GrDeviceSpaceTextureDecalFragmentProcessor>();
-            const char* scaleAndTranslateName;
-            fScaleAndTranslateUni = args.fUniformHandler->addUniform(kFragment_GrShaderFlag,
-                                                                     kHalf4_GrSLType,
-                                                                     "scaleAndTranslate",
-                                                                     &scaleAndTranslateName);
-            args.fFragBuilder->codeAppendf("half2 coords = half2(sk_FragCoord.xy * %s.xy + %s.zw);",
-                                           scaleAndTranslateName, scaleAndTranslateName);
-            fGLDomain.sampleTexture(args.fFragBuilder,
-                                    args.fUniformHandler,
-                                    args.fShaderCaps,
-                                    dstdfp.fTextureDomain,
-                                    args.fOutputColor,
-                                    SkString("coords"),
-                                    args.fTexSamplers[0],
-                                    args.fInputColor);
-        }
-
-    protected:
-        void onSetData(const GrGLSLProgramDataManager& pdman,
-                       const GrFragmentProcessor& fp) override {
-            const GrDeviceSpaceTextureDecalFragmentProcessor& dstdfp =
-                    fp.cast<GrDeviceSpaceTextureDecalFragmentProcessor>();
-            const auto& view = dstdfp.textureSampler(0).view();
-            SkISize textureDims = view.proxy()->backingStoreDimensions();
-
-            fGLDomain.setData(pdman, dstdfp.fTextureDomain, view,
-                              dstdfp.textureSampler(0).samplerState());
-            float iw = 1.f / textureDims.width();
-            float ih = 1.f / textureDims.height();
-            float scaleAndTransData[4] = {
-                iw, ih,
-                -dstdfp.fDeviceSpaceOffset.fX * iw, -dstdfp.fDeviceSpaceOffset.fY * ih
-            };
-            if (view.origin() == kBottomLeft_GrSurfaceOrigin) {
-                scaleAndTransData[1] = -scaleAndTransData[1];
-                scaleAndTransData[3] = 1 - scaleAndTransData[3];
-            }
-            pdman.set4fv(fScaleAndTranslateUni, 1, scaleAndTransData);
-        }
-
-    private:
-        GrTextureDomain::GLDomain   fGLDomain;
-        UniformHandle               fScaleAndTranslateUni;
-    };
-
-    return new GLSLProcessor;
-}
-
-bool GrDeviceSpaceTextureDecalFragmentProcessor::onIsEqual(const GrFragmentProcessor& fp) const {
-    const GrDeviceSpaceTextureDecalFragmentProcessor& dstdfp =
-            fp.cast<GrDeviceSpaceTextureDecalFragmentProcessor>();
-    return dstdfp.fTextureSampler.view().proxy()->underlyingUniqueID() ==
-                   fTextureSampler.view().proxy()->underlyingUniqueID() &&
-           dstdfp.fDeviceSpaceOffset == fDeviceSpaceOffset &&
-           dstdfp.fTextureDomain == fTextureDomain;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrDeviceSpaceTextureDecalFragmentProcessor);
-
-#if GR_TEST_UTILS
-std::unique_ptr<GrFragmentProcessor> GrDeviceSpaceTextureDecalFragmentProcessor::TestCreate(
-        GrProcessorTestData* d) {
-    auto [view, at, ct] = d->randomView();
-    SkIRect subset;
-    subset.fLeft = d->fRandom->nextULessThan(view.width() - 1);
-    subset.fRight = d->fRandom->nextRangeU(subset.fLeft, view.width());
-    subset.fTop = d->fRandom->nextULessThan(view.height() - 1);
-    subset.fBottom = d->fRandom->nextRangeU(subset.fTop, view.height());
-    SkIPoint pt;
-    pt.fX = d->fRandom->nextULessThan(2048);
-    pt.fY = d->fRandom->nextULessThan(2048);
-
-    return GrDeviceSpaceTextureDecalFragmentProcessor::Make(std::move(view), subset, pt);
-}
-#endif
diff --git a/src/gpu/effects/GrTextureDomain.h b/src/gpu/effects/GrTextureDomain.h
index c9a563a..e88c4c2 100644
--- a/src/gpu/effects/GrTextureDomain.h
+++ b/src/gpu/effects/GrTextureDomain.h
@@ -231,47 +231,4 @@
     int     fIndex;
 };
 
-class GrDeviceSpaceTextureDecalFragmentProcessor : public GrFragmentProcessor {
-public:
-    static std::unique_ptr<GrFragmentProcessor> Make(GrSurfaceProxyView,
-                                                     const SkIRect& subset,
-                                                     const SkIPoint& deviceSpaceOffset);
-
-    const char* name() const override { return "GrDeviceSpaceTextureDecalFragmentProcessor"; }
-
-#ifdef SK_DEBUG
-    SkString dumpInfo() const override {
-        SkString str;
-        str.appendf("Domain: [L: %.2f, T: %.2f, R: %.2f, B: %.2f] Offset: [%d %d]",
-                    fTextureDomain.domain().fLeft, fTextureDomain.domain().fTop,
-                    fTextureDomain.domain().fRight, fTextureDomain.domain().fBottom,
-                    fDeviceSpaceOffset.fX, fDeviceSpaceOffset.fY);
-        str.append(INHERITED::dumpInfo());
-        return str;
-    }
-#endif
-
-    std::unique_ptr<GrFragmentProcessor> clone() const override;
-
-private:
-    GrTextureDomain fTextureDomain;
-    TextureSampler fTextureSampler;
-    SkIPoint fDeviceSpaceOffset;
-
-    GrDeviceSpaceTextureDecalFragmentProcessor(GrSurfaceProxyView, const SkIRect&, const SkIPoint&);
-    GrDeviceSpaceTextureDecalFragmentProcessor(const GrDeviceSpaceTextureDecalFragmentProcessor&);
-
-    GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
-
-    // Since we always use decal mode, there is no need for key data.
-    void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {}
-
-    bool onIsEqual(const GrFragmentProcessor& fp) const override;
-
-    const TextureSampler& onTextureSampler(int) const override { return fTextureSampler; }
-
-    GR_DECLARE_FRAGMENT_PROCESSOR_TEST
-
-    typedef GrFragmentProcessor INHERITED;
-};
 #endif
diff --git a/src/gpu/effects/generated/GrDeviceSpaceEffect.cpp b/src/gpu/effects/generated/GrDeviceSpaceEffect.cpp
new file mode 100644
index 0000000..8df3a1b
--- /dev/null
+++ b/src/gpu/effects/generated/GrDeviceSpaceEffect.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+/**************************************************************************************************
+ *** This file was autogenerated from GrDeviceSpaceEffect.fp; do not modify.
+ **************************************************************************************************/
+#include "GrDeviceSpaceEffect.h"
+
+#include "include/gpu/GrTexture.h"
+#include "src/gpu/glsl/GrGLSLFragmentProcessor.h"
+#include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
+#include "src/gpu/glsl/GrGLSLProgramBuilder.h"
+#include "src/sksl/SkSLCPP.h"
+#include "src/sksl/SkSLUtil.h"
+class GrGLSLDeviceSpaceEffect : public GrGLSLFragmentProcessor {
+public:
+    GrGLSLDeviceSpaceEffect() {}
+    void emitCode(EmitArgs& args) override {
+        GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
+        const GrDeviceSpaceEffect& _outer = args.fFp.cast<GrDeviceSpaceEffect>();
+        (void)_outer;
+        SkString _input204 = SkStringPrintf("%s", args.fInputColor);
+        SkString _sample204;
+        SkString _coords204("sk_FragCoord.xy");
+        _sample204 =
+                this->invokeChild(_outer.fp_index, _input204.c_str(), args, _coords204.c_str());
+        fragBuilder->codeAppendf("%s = %s;\n", args.fOutputColor, _sample204.c_str());
+    }
+
+private:
+    void onSetData(const GrGLSLProgramDataManager& pdman,
+                   const GrFragmentProcessor& _proc) override {}
+};
+GrGLSLFragmentProcessor* GrDeviceSpaceEffect::onCreateGLSLInstance() const {
+    return new GrGLSLDeviceSpaceEffect();
+}
+void GrDeviceSpaceEffect::onGetGLSLProcessorKey(const GrShaderCaps& caps,
+                                                GrProcessorKeyBuilder* b) const {}
+bool GrDeviceSpaceEffect::onIsEqual(const GrFragmentProcessor& other) const {
+    const GrDeviceSpaceEffect& that = other.cast<GrDeviceSpaceEffect>();
+    (void)that;
+    return true;
+}
+GrDeviceSpaceEffect::GrDeviceSpaceEffect(const GrDeviceSpaceEffect& src)
+        : INHERITED(kGrDeviceSpaceEffect_ClassID, src.optimizationFlags()), fp_index(src.fp_index) {
+    {
+        auto clone = src.childProcessor(fp_index).clone();
+        clone->setSampledWithExplicitCoords(
+                src.childProcessor(fp_index).isSampledWithExplicitCoords());
+        this->registerChildProcessor(std::move(clone));
+    }
+}
+std::unique_ptr<GrFragmentProcessor> GrDeviceSpaceEffect::clone() const {
+    return std::unique_ptr<GrFragmentProcessor>(new GrDeviceSpaceEffect(*this));
+}
+GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrDeviceSpaceEffect);
+#if GR_TEST_UTILS
+std::unique_ptr<GrFragmentProcessor> GrDeviceSpaceEffect::TestCreate(GrProcessorTestData* d) {
+    std::unique_ptr<GrFragmentProcessor> fp;
+    // We have a restriction that explicit coords only work for FPs with exactly one
+    // coord transform.
+    do {
+        fp = GrProcessorUnitTest::MakeChildFP(d);
+    } while (fp->numCoordTransforms() != 1);
+    return GrDeviceSpaceEffect::Make(std::move(fp));
+}
+#endif
diff --git a/src/gpu/effects/generated/GrDeviceSpaceEffect.h b/src/gpu/effects/generated/GrDeviceSpaceEffect.h
new file mode 100644
index 0000000..d0e9253
--- /dev/null
+++ b/src/gpu/effects/generated/GrDeviceSpaceEffect.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+/**************************************************************************************************
+ *** This file was autogenerated from GrDeviceSpaceEffect.fp; do not modify.
+ **************************************************************************************************/
+#ifndef GrDeviceSpaceEffect_DEFINED
+#define GrDeviceSpaceEffect_DEFINED
+#include "include/core/SkTypes.h"
+#include "include/core/SkM44.h"
+
+#include "src/gpu/GrCoordTransform.h"
+#include "src/gpu/GrFragmentProcessor.h"
+class GrDeviceSpaceEffect : public GrFragmentProcessor {
+public:
+    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor> fp) {
+        return std::unique_ptr<GrFragmentProcessor>(new GrDeviceSpaceEffect(std::move(fp)));
+    }
+    GrDeviceSpaceEffect(const GrDeviceSpaceEffect& src);
+    std::unique_ptr<GrFragmentProcessor> clone() const override;
+    const char* name() const override { return "DeviceSpaceEffect"; }
+    int fp_index = -1;
+
+private:
+    GrDeviceSpaceEffect(std::unique_ptr<GrFragmentProcessor> fp)
+            : INHERITED(kGrDeviceSpaceEffect_ClassID, kNone_OptimizationFlags) {
+        SkASSERT(fp);
+        fp_index = this->numChildProcessors();
+        fp->setSampledWithExplicitCoords(true);
+        this->registerChildProcessor(std::move(fp));
+    }
+    GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
+    void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override;
+    bool onIsEqual(const GrFragmentProcessor&) const override;
+    GR_DECLARE_FRAGMENT_PROCESSOR_TEST
+    typedef GrFragmentProcessor INHERITED;
+};
+#endif
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 55d7719..55d8ae0 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -2323,19 +2323,14 @@
     }
 }
 
-void GrGLGpu::drawMesh(GrRenderTarget* renderTarget, GrPrimitiveType primitiveType,
-                       const GrMesh& mesh) {
+GrGLenum GrGLGpu::prepareToDraw(GrPrimitiveType primitiveType) {
     if (this->glCaps().requiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines() &&
         GrIsPrimTypeLines(primitiveType) && !GrIsPrimTypeLines(fLastPrimitiveType)) {
         GL_CALL(Enable(GR_GL_CULL_FACE));
         GL_CALL(Disable(GR_GL_CULL_FACE));
     }
-
-    mesh.sendToGpu(primitiveType, this);
     fLastPrimitiveType = primitiveType;
-}
 
-static GrGLenum gr_primitive_type_to_gl_mode(GrPrimitiveType primitiveType) {
     switch (primitiveType) {
         case GrPrimitiveType::kTriangles:
             return GR_GL_TRIANGLES;
@@ -2356,15 +2351,14 @@
     SK_ABORT("invalid GrPrimitiveType");
 }
 
-void GrGLGpu::sendArrayMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh, int vertexCount,
-                                 int baseVertex) {
-    const GrGLenum glPrimType = gr_primitive_type_to_gl_mode(primitiveType);
+void GrGLGpu::draw(GrPrimitiveType primitiveType, const GrBuffer* vertexBuffer, int vertexCount,
+                   int baseVertex) {
+    GrGLenum glPrimType = this->prepareToDraw(primitiveType);
     if (this->glCaps().drawArraysBaseVertexIsBroken()) {
-        this->setupGeometry(nullptr, mesh.vertexBuffer(), baseVertex, nullptr, 0,
-                            GrPrimitiveRestart::kNo);
+        this->setupGeometry(nullptr, vertexBuffer, baseVertex, nullptr, 0, GrPrimitiveRestart::kNo);
         GL_CALL(DrawArrays(glPrimType, 0, vertexCount));
     } else {
-        this->setupGeometry(nullptr, mesh.vertexBuffer(), 0, nullptr, 0, GrPrimitiveRestart::kNo);
+        this->setupGeometry(nullptr, vertexBuffer, 0, nullptr, 0, GrPrimitiveRestart::kNo);
         GL_CALL(DrawArrays(glPrimType, baseVertex, vertexCount));
     }
     fStats.incNumDraws();
@@ -2379,15 +2373,13 @@
     }
 }
 
-void GrGLGpu::sendIndexedMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh,
-                                   int indexCount, int baseIndex, uint16_t minIndexValue,
-                                   uint16_t maxIndexValue, int baseVertex) {
-    const GrGLenum glPrimType = gr_primitive_type_to_gl_mode(primitiveType);
-    const GrGLvoid* elementPtr = element_ptr(mesh.indexBuffer(), baseIndex);
-
-    this->setupGeometry(mesh.indexBuffer(), mesh.vertexBuffer(), baseVertex, nullptr, 0,
-                        mesh.primitiveRestart());
-
+void GrGLGpu::drawIndexed(GrPrimitiveType primitiveType, const GrBuffer* indexBuffer,
+                          int indexCount, int baseIndex, GrPrimitiveRestart primitiveRestart,
+                          uint16_t minIndexValue, uint16_t maxIndexValue,
+                          const GrBuffer* vertexBuffer, int baseVertex) {
+    GrGLenum glPrimType = this->prepareToDraw(primitiveType);
+    const GrGLvoid* elementPtr = element_ptr(indexBuffer, baseIndex);
+    this->setupGeometry(indexBuffer, vertexBuffer, baseVertex, nullptr, 0, primitiveRestart);
     if (this->glCaps().drawRangeElementsSupport()) {
         GL_CALL(DrawRangeElements(glPrimType, minIndexValue, maxIndexValue, indexCount,
                                   GR_GL_UNSIGNED_SHORT, elementPtr));
@@ -2397,29 +2389,30 @@
     fStats.incNumDraws();
 }
 
-void GrGLGpu::sendInstancedMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh,
-                                     int vertexCount, int baseVertex, int instanceCount,
-                                     int baseInstance) {
-    GrGLenum glPrimType = gr_primitive_type_to_gl_mode(primitiveType);
+void GrGLGpu::drawInstanced(GrPrimitiveType primitiveType, const GrBuffer* instanceBuffer,
+                            int instanceCount, int baseInstance, const GrBuffer* vertexBuffer,
+                            int vertexCount, int baseVertex) {
+    GrGLenum glPrimType = this->prepareToDraw(primitiveType);
     int maxInstances = this->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount);
     for (int i = 0; i < instanceCount; i += maxInstances) {
-        this->setupGeometry(nullptr, mesh.vertexBuffer(), 0, mesh.instanceBuffer(),
-                            baseInstance + i, GrPrimitiveRestart::kNo);
+        this->setupGeometry(nullptr, vertexBuffer, 0, instanceBuffer, baseInstance + i,
+                            GrPrimitiveRestart::kNo);
         GL_CALL(DrawArraysInstanced(glPrimType, baseVertex, vertexCount,
                                     std::min(instanceCount - i, maxInstances)));
         fStats.incNumDraws();
     }
 }
 
-void GrGLGpu::sendIndexedInstancedMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh,
-                                            int indexCount, int baseIndex, int baseVertex,
-                                            int instanceCount, int baseInstance) {
-    const GrGLenum glPrimType = gr_primitive_type_to_gl_mode(primitiveType);
-    const GrGLvoid* elementPtr = element_ptr(mesh.indexBuffer(), baseIndex);
+void GrGLGpu::drawIndexedInstanced(
+        GrPrimitiveType primitiveType, const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+        GrPrimitiveRestart primitiveRestart, const GrBuffer* instanceBuffer, int instanceCount,
+        int baseInstance, const GrBuffer* vertexBuffer, int baseVertex) {
+    GrGLenum glPrimType = this->prepareToDraw(primitiveType);
+    const GrGLvoid* elementPtr = element_ptr(indexBuffer, baseIndex);
     int maxInstances = this->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount);
     for (int i = 0; i < instanceCount; i += maxInstances) {
-        this->setupGeometry(mesh.indexBuffer(), mesh.vertexBuffer(), baseVertex,
-                            mesh.instanceBuffer(), baseInstance + i, mesh.primitiveRestart());
+        this->setupGeometry(indexBuffer, vertexBuffer, baseVertex,
+                            instanceBuffer, baseInstance + i, primitiveRestart);
         GL_CALL(DrawElementsInstanced(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT, elementPtr,
                                       std::min(instanceCount - i, maxInstances)));
         fStats.incNumDraws();
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index de2c53f..f6cc07c 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -31,7 +31,7 @@
 class GrPipeline;
 class GrSwizzle;
 
-class GrGLGpu final : public GrGpu, private GrMesh::SendToGpuImpl {
+class GrGLGpu final : public GrGpu {
 public:
     static sk_sp<GrGpu> Make(sk_sp<const GrGLInterface>, const GrContextOptions&, GrContext*);
     ~GrGLGpu() override;
@@ -81,24 +81,18 @@
         fHWProgram->bindTextures(primProc, pipeline, primProcTextures);
     }
 
-    // The GrGLOpsRenderPass does not buffer up draws before submitting them to the gpu.
-    // Thus this is the implementation of the draw call for the corresponding passthrough function
-    // on GrGLOpsRenderPass.
-    //
-    // The client must call flushGLState before this method.
-    void drawMesh(GrRenderTarget*, GrPrimitiveType, const GrMesh&);
-
-    // GrMesh::SendToGpuImpl methods. These issue the actual GL draw calls.
-    // Marked final as a hint to the compiler to not use virtual dispatch.
-    void sendArrayMeshToGpu(GrPrimitiveType primitiveType, const GrMesh&, int vertexCount,
-                            int baseVertex) final;
-    void sendIndexedMeshToGpu(GrPrimitiveType, const GrMesh&, int indexCount, int baseIndex,
-                              uint16_t minIndexValue, uint16_t maxIndexValue, int baseVertex) final;
-    void sendInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int vertexCount, int baseVertex,
-                                int instanceCount, int baseInstance) final;
-    void sendIndexedInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int indexCount,
-                                       int baseIndex, int baseVertex, int instanceCount,
-                                       int baseInstance) final;
+    // These methods issue draws using the current GL state. The client must call flushGLState,
+    // followed by flushScissorRect and/or bindTextures if applicable, before using these method.
+    void draw(GrPrimitiveType, const GrBuffer* vertexBuffer, int vertexCount, int baseVertex);
+    void drawIndexed(GrPrimitiveType, const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                     GrPrimitiveRestart, uint16_t minIndexValue, uint16_t maxIndexValue,
+                     const GrBuffer* vertexBuffer, int baseVertex);
+    void drawInstanced(GrPrimitiveType, const GrBuffer* instanceBuffer, int instanceCount, int
+                       baseInstance, const GrBuffer* vertexBuffer, int vertexCount, int baseVertex);
+    void drawIndexedInstanced(GrPrimitiveType, const GrBuffer* indexBuffer, int indexCount,
+                              int baseIndex, GrPrimitiveRestart, const GrBuffer* instanceBuffer,
+                              int instanceCount, int baseInstance, const GrBuffer* vertexBuffer,
+                              int baseVertex);
 
     // The GrGLOpsRenderPass does not buffer up draws before submitting them to the gpu.
     // Thus this is the implementation of the clear call for the corresponding passthrough function
@@ -307,6 +301,9 @@
                        int baseInstance,
                        GrPrimitiveRestart);
 
+    // Applies any necessary workarounds and returns the GL primitive type to use in draw calls.
+    GrGLenum prepareToDraw(GrPrimitiveType primitiveType);
+
     void flushBlendAndColorWrite(const GrXferProcessor::BlendInfo& blendInfo, const GrSwizzle&);
 
     bool onFinishFlush(GrSurfaceProxy*[], int n, SkSurface::BackendSurfaceAccess access,
diff --git a/src/gpu/gl/GrGLOpsRenderPass.cpp b/src/gpu/gl/GrGLOpsRenderPass.cpp
index 217fdb0..60af18e 100644
--- a/src/gpu/gl/GrGLOpsRenderPass.cpp
+++ b/src/gpu/gl/GrGLOpsRenderPass.cpp
@@ -9,6 +9,7 @@
 
 #include "src/gpu/GrContextPriv.h"
 #include "src/gpu/GrFixedClip.h"
+#include "src/gpu/GrProgramInfo.h"
 #include "src/gpu/GrRenderTargetPriv.h"
 
 void GrGLOpsRenderPass::set(GrRenderTarget* rt, const SkIRect& contentBounds,
@@ -23,3 +24,58 @@
     fColorLoadAndStoreInfo = colorInfo;
     fStencilLoadAndStoreInfo = stencilInfo;
 }
+
+bool GrGLOpsRenderPass::onBindPipeline(const GrProgramInfo& programInfo,
+                                       const SkRect& drawBounds) {
+    fPrimitiveType = programInfo.primitiveType();
+    return fGpu->flushGLState(fRenderTarget, programInfo);
+}
+
+void GrGLOpsRenderPass::onSetScissorRect(const SkIRect& scissor) {
+    fGpu->flushScissorRect(scissor, fRenderTarget->width(), fRenderTarget->height(), fOrigin);
+}
+
+bool GrGLOpsRenderPass::onBindTextures(const GrPrimitiveProcessor& primProc,
+                                       const GrPipeline& pipeline,
+                                       const GrSurfaceProxy* const primProcTextures[]) {
+    fGpu->bindTextures(primProc, pipeline, primProcTextures);
+    return true;
+}
+
+void GrGLOpsRenderPass::onDraw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) {
+    fGpu->draw(fPrimitiveType, vertexBuffer, vertexCount, baseVertex);
+}
+
+void GrGLOpsRenderPass::onDrawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                                      GrPrimitiveRestart primitiveRestart, uint16_t minIndexValue,
+                                      uint16_t maxIndexValue, const GrBuffer* vertexBuffer,
+                                      int baseVertex) {
+    fGpu->drawIndexed(fPrimitiveType, indexBuffer, indexCount, baseIndex, primitiveRestart,
+                      minIndexValue, maxIndexValue, vertexBuffer, baseVertex);
+}
+
+void GrGLOpsRenderPass::onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount,
+                                        int baseInstance, const GrBuffer* vertexBuffer,
+                                        int vertexCount, int baseVertex) {
+    fGpu->drawInstanced(fPrimitiveType, instanceBuffer, instanceCount, baseInstance, vertexBuffer,
+                      vertexCount, baseVertex);
+}
+
+void GrGLOpsRenderPass::onDrawIndexedInstanced(
+        const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+        GrPrimitiveRestart primitiveRestart, const GrBuffer* instanceBuffer, int instanceCount,
+        int baseInstance, const GrBuffer* vertexBuffer, int baseVertex) {
+    fGpu->drawIndexedInstanced(fPrimitiveType, indexBuffer, indexCount, baseIndex, primitiveRestart,
+                               instanceBuffer, instanceCount, baseInstance, vertexBuffer,
+                               baseVertex);
+}
+
+void GrGLOpsRenderPass::onClear(const GrFixedClip& clip, const SkPMColor4f& color) {
+    fGpu->clear(clip, color, fRenderTarget, fOrigin);
+}
+
+void GrGLOpsRenderPass::onClearStencilClip(const GrFixedClip& clip,
+                                           bool insideStencilMask) {
+    fGpu->clearStencilClip(clip, insideStencilMask, fRenderTarget, fOrigin);
+}
+
diff --git a/src/gpu/gl/GrGLOpsRenderPass.h b/src/gpu/gl/GrGLOpsRenderPass.h
index 9a00aec..e79558d 100644
--- a/src/gpu/gl/GrGLOpsRenderPass.h
+++ b/src/gpu/gl/GrGLOpsRenderPass.h
@@ -49,37 +49,31 @@
 private:
     GrGpu* gpu() override { return fGpu; }
 
-    bool onBindPipeline(const GrProgramInfo& programInfo, const SkRect& drawBounds) override {
-        return fGpu->flushGLState(fRenderTarget, programInfo);
-    }
-
-    void onSetScissorRect(const SkIRect& scissor) override {
-        fGpu->flushScissorRect(scissor, fRenderTarget->width(), fRenderTarget->height(), fOrigin);
-    }
-
+    bool onBindPipeline(const GrProgramInfo& programInfo, const SkRect& drawBounds) override;
+    void onSetScissorRect(const SkIRect& scissor) override;
     bool onBindTextures(const GrPrimitiveProcessor& primProc, const GrPipeline& pipeline,
-                        const GrSurfaceProxy* const primProcTextures[]) override {
-        fGpu->bindTextures(primProc, pipeline, primProcTextures);
-        return true;
-    }
+                        const GrSurfaceProxy* const primProcTextures[]) override;
+    void onDraw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override;
+    void onDrawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                       GrPrimitiveRestart, uint16_t minIndexValue, uint16_t maxIndexValue,
+                       const GrBuffer* vertexBuffer, int baseVertex) override;
+    void onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount, int baseInstance,
+                         const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override;
+    void onDrawIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                                GrPrimitiveRestart, const GrBuffer* instanceBuffer,
+                                int instanceCount, int baseInstance, const GrBuffer* vertexBuffer,
+                                int baseVertex) override;
+    void onClear(const GrFixedClip& clip, const SkPMColor4f& color) override;
+    void onClearStencilClip(const GrFixedClip& clip, bool insideStencilMask) override;
 
-    void onDrawMesh(GrPrimitiveType primitiveType, const GrMesh& mesh) override {
-        fGpu->drawMesh(fRenderTarget, primitiveType, mesh);
-    }
-
-    void onClear(const GrFixedClip& clip, const SkPMColor4f& color) override {
-        fGpu->clear(clip, color, fRenderTarget, fOrigin);
-    }
-
-    void onClearStencilClip(const GrFixedClip& clip, bool insideStencilMask) override {
-        fGpu->clearStencilClip(clip, insideStencilMask, fRenderTarget, fOrigin);
-    }
-
-    GrGLGpu*                fGpu;
-    SkIRect                 fContentBounds;
-    LoadAndStoreInfo        fColorLoadAndStoreInfo;
+    GrGLGpu* fGpu;
+    SkIRect fContentBounds;
+    LoadAndStoreInfo fColorLoadAndStoreInfo;
     StencilLoadAndStoreInfo fStencilLoadAndStoreInfo;
 
+    // Per-pipeline state.
+    GrPrimitiveType fPrimitiveType;
+
     typedef GrOpsRenderPass INHERITED;
 };
 
diff --git a/src/gpu/mock/GrMockOpsRenderPass.h b/src/gpu/mock/GrMockOpsRenderPass.h
index d5af562..756a14c 100644
--- a/src/gpu/mock/GrMockOpsRenderPass.h
+++ b/src/gpu/mock/GrMockOpsRenderPass.h
@@ -42,14 +42,32 @@
                         const GrSurfaceProxy* const primProcTextures[]) override {
         return true;
     }
-    void onDrawMesh(GrPrimitiveType, const GrMesh&) override {
-        this->markRenderTargetDirty();
-        ++fNumDraws;
+    void onDraw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override {
+        this->dummyDraw();
+    }
+    void onDrawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                       GrPrimitiveRestart, uint16_t minIndexValue, uint16_t maxIndexValue,
+                       const GrBuffer* vertexBuffer, int baseVertex) override {
+        this->dummyDraw();
+    }
+    void onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount, int baseInstance,
+                         const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override {
+        this->dummyDraw();
+    }
+    void onDrawIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                                GrPrimitiveRestart, const GrBuffer* instanceBuffer,
+                                int instanceCount, int baseInstance, const GrBuffer* vertexBuffer,
+                                int baseVertex) override {
+        this->dummyDraw();
     }
     void onClear(const GrFixedClip&, const SkPMColor4f&) override {
         this->markRenderTargetDirty();
     }
     void onClearStencilClip(const GrFixedClip&, bool insideStencilMask) override {}
+    void dummyDraw() {
+        this->markRenderTargetDirty();
+        ++fNumDraws;
+    }
     void markRenderTargetDirty() {
         if (auto* tex = fRenderTarget->asTexture()) {
             tex->texturePriv().markMipMapsDirty();
diff --git a/src/gpu/mtl/GrMtlOpsRenderPass.h b/src/gpu/mtl/GrMtlOpsRenderPass.h
index 9613405..90136fe 100644
--- a/src/gpu/mtl/GrMtlOpsRenderPass.h
+++ b/src/gpu/mtl/GrMtlOpsRenderPass.h
@@ -20,7 +20,7 @@
 class GrMtlPipelineState;
 class GrMtlRenderTarget;
 
-class GrMtlOpsRenderPass : public GrOpsRenderPass, private GrMesh::SendToGpuImpl {
+class GrMtlOpsRenderPass : public GrOpsRenderPass {
 public:
     GrMtlOpsRenderPass(GrMtlGpu* gpu, GrRenderTarget* rt, GrSurfaceOrigin origin,
                        const GrOpsRenderPass::LoadAndStoreInfo& colorInfo,
@@ -43,7 +43,16 @@
     void onSetScissorRect(const SkIRect&) override;
     bool onBindTextures(const GrPrimitiveProcessor&, const GrPipeline&,
                         const GrSurfaceProxy* const primProcTextures[]) override;
-    void onDrawMesh(GrPrimitiveType, const GrMesh&) override;
+    void onDraw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override;
+    void onDrawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                       GrPrimitiveRestart, uint16_t minIndexValue, uint16_t maxIndexValue,
+                       const GrBuffer* vertexBuffer, int baseVertex) override;
+    void onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount, int baseInstance,
+                         const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override;
+    void onDrawIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                                GrPrimitiveRestart, const GrBuffer* instanceBuffer,
+                                int instanceCount, int baseInstance, const GrBuffer* vertexBuffer,
+                                int baseVertex) override;
 
     void onClear(const GrFixedClip& clip, const SkPMColor4f& color) override;
 
@@ -55,18 +64,6 @@
     void bindGeometry(const GrBuffer* vertexBuffer, size_t vertexOffset,
                       const GrBuffer* instanceBuffer);
 
-    // GrMesh::SendToGpuImpl methods. These issue the actual Metal draw commands.
-    // Marked final as a hint to the compiler to not use virtual dispatch.
-    void sendArrayMeshToGpu(GrPrimitiveType, const GrMesh&, int vertexCount, int baseVertex) final;
-    void sendIndexedMeshToGpu(GrPrimitiveType, const GrMesh&, int indexCount, int baseIndex,
-                              uint16_t /*minIndexValue*/, uint16_t /*maxIndexValue*/,
-                              int baseVertex) final;
-    void sendInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int vertexCount, int baseVertex,
-                                int instanceCount, int baseInstance) final;
-    void sendIndexedInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int indexCount,
-                                       int baseIndex, int baseVertex, int instanceCount,
-                                       int baseInstance) final;
-
     void setVertexBuffer(id<MTLRenderCommandEncoder>, const GrMtlBuffer*, size_t offset,
                          size_t index);
     void resetBufferBindings();
@@ -76,6 +73,7 @@
 
     id<MTLRenderCommandEncoder> fActiveRenderCmdEncoder;
     GrMtlPipelineState*         fActivePipelineState = nullptr;
+    MTLPrimitiveType            fActivePrimitiveType;
     MTLRenderPassDescriptor*    fRenderPassDesc;
     SkRect                      fBounds;
     size_t                      fCurrentVertexStride;
diff --git a/src/gpu/mtl/GrMtlOpsRenderPass.mm b/src/gpu/mtl/GrMtlOpsRenderPass.mm
index 8c902d5..7729c16 100644
--- a/src/gpu/mtl/GrMtlOpsRenderPass.mm
+++ b/src/gpu/mtl/GrMtlOpsRenderPass.mm
@@ -49,6 +49,24 @@
     fActiveRenderCmdEncoder = nil;
 }
 
+static MTLPrimitiveType gr_to_mtl_primitive(GrPrimitiveType primitiveType) {
+    const static MTLPrimitiveType mtlPrimitiveType[] {
+        MTLPrimitiveTypeTriangle,
+        MTLPrimitiveTypeTriangleStrip,
+        MTLPrimitiveTypePoint,
+        MTLPrimitiveTypeLine,
+        MTLPrimitiveTypeLineStrip
+    };
+    static_assert((int)GrPrimitiveType::kTriangles == 0);
+    static_assert((int)GrPrimitiveType::kTriangleStrip == 1);
+    static_assert((int)GrPrimitiveType::kPoints == 2);
+    static_assert((int)GrPrimitiveType::kLines == 3);
+    static_assert((int)GrPrimitiveType::kLineStrip == 4);
+
+    SkASSERT(primitiveType <= GrPrimitiveType::kLineStrip);
+    return mtlPrimitiveType[static_cast<int>(primitiveType)];
+}
+
 bool GrMtlOpsRenderPass::onBindPipeline(const GrProgramInfo& programInfo,
                                         const SkRect& drawBounds) {
     fActivePipelineState = fGpu->resourceProvider().findOrCreateCompatiblePipelineState(
@@ -83,6 +101,7 @@
                                                                        fRenderTarget->height()));
     }
 
+    fActivePrimitiveType = gr_to_mtl_primitive(programInfo.primitiveType());
     fBounds.join(drawBounds);
     return true;
 }
@@ -104,12 +123,6 @@
     return true;
 }
 
-void GrMtlOpsRenderPass::onDrawMesh(GrPrimitiveType primitiveType, const GrMesh& mesh) {
-    SkASSERT(fActivePipelineState);
-    SkASSERT(nil != fActiveRenderCmdEncoder);
-    mesh.sendToGpu(primitiveType, this);
-}
-
 void GrMtlOpsRenderPass::onClear(const GrFixedClip& clip, const SkPMColor4f& color) {
     // We should never end up here since all clears should either be done as draws or load ops in
     // metal. If we hit this assert then we missed a chance to set a load op on the
@@ -233,24 +246,6 @@
     fActiveRenderCmdEncoder = nil;
 }
 
-static MTLPrimitiveType gr_to_mtl_primitive(GrPrimitiveType primitiveType) {
-    const static MTLPrimitiveType mtlPrimitiveType[] {
-        MTLPrimitiveTypeTriangle,
-        MTLPrimitiveTypeTriangleStrip,
-        MTLPrimitiveTypePoint,
-        MTLPrimitiveTypeLine,
-        MTLPrimitiveTypeLineStrip
-    };
-    static_assert((int)GrPrimitiveType::kTriangles == 0);
-    static_assert((int)GrPrimitiveType::kTriangleStrip == 1);
-    static_assert((int)GrPrimitiveType::kPoints == 2);
-    static_assert((int)GrPrimitiveType::kLines == 3);
-    static_assert((int)GrPrimitiveType::kLineStrip == 4);
-
-    SkASSERT(primitiveType <= GrPrimitiveType::kLineStrip);
-    return mtlPrimitiveType[static_cast<int>(primitiveType)];
-}
-
 void GrMtlOpsRenderPass::bindGeometry(const GrBuffer* vertexBuffer,
                                       size_t vertexOffset,
                                       const GrBuffer* instanceBuffer) {
@@ -271,34 +266,37 @@
     }
 }
 
-void GrMtlOpsRenderPass::sendArrayMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh,
-                                            int vertexCount, int baseVertex) {
-    this->bindGeometry(mesh.vertexBuffer(), 0, nullptr);
+void GrMtlOpsRenderPass::onDraw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) {
+    SkASSERT(fActivePipelineState);
+    SkASSERT(nil != fActiveRenderCmdEncoder);
+    this->bindGeometry(vertexBuffer, 0, nullptr);
 
-    [fActiveRenderCmdEncoder drawPrimitives:gr_to_mtl_primitive(primitiveType)
+    [fActiveRenderCmdEncoder drawPrimitives:fActivePrimitiveType
                                 vertexStart:baseVertex
                                 vertexCount:vertexCount];
 }
 
-void GrMtlOpsRenderPass::sendIndexedMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh,
-                                              int indexCount, int baseIndex,
-                                              uint16_t /*minIndexValue*/,
-                                              uint16_t /*maxIndexValue*/, int baseVertex) {
-    this->bindGeometry(mesh.vertexBuffer(), fCurrentVertexStride*baseVertex, nullptr);
+void GrMtlOpsRenderPass::onDrawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                                       GrPrimitiveRestart primitiveRestart, uint16_t minIndexValue,
+                                       uint16_t maxIndexValue, const GrBuffer* vertexBuffer,
+                                       int baseVertex) {
+    SkASSERT(fActivePipelineState);
+    SkASSERT(nil != fActiveRenderCmdEncoder);
+    this->bindGeometry(vertexBuffer, fCurrentVertexStride*baseVertex, nullptr);
 
     id<MTLBuffer> mtlIndexBuffer = nil;
-    if (mesh.indexBuffer()) {
-        SkASSERT(!mesh.indexBuffer()->isCpuBuffer());
-        SkASSERT(!static_cast<const GrGpuBuffer*>(mesh.indexBuffer())->isMapped());
+    if (indexBuffer) {
+        SkASSERT(!indexBuffer->isCpuBuffer());
+        SkASSERT(!static_cast<const GrGpuBuffer*>(indexBuffer)->isMapped());
 
-        mtlIndexBuffer = static_cast<const GrMtlBuffer*>(mesh.indexBuffer())->mtlBuffer();
+        mtlIndexBuffer = static_cast<const GrMtlBuffer*>(indexBuffer)->mtlBuffer();
         SkASSERT(mtlIndexBuffer);
     }
 
-    SkASSERT(mesh.primitiveRestart() == GrPrimitiveRestart::kNo);
-    size_t indexOffset = static_cast<const GrMtlBuffer*>(mesh.indexBuffer())->offset() +
+    SkASSERT(primitiveRestart == GrPrimitiveRestart::kNo);
+    size_t indexOffset = static_cast<const GrMtlBuffer*>(indexBuffer)->offset() +
                          sizeof(uint16_t) * baseIndex;
-    [fActiveRenderCmdEncoder drawIndexedPrimitives:gr_to_mtl_primitive(primitiveType)
+    [fActiveRenderCmdEncoder drawIndexedPrimitives:fActivePrimitiveType
                                         indexCount:indexCount
                                          indexType:MTLIndexTypeUInt16
                                        indexBuffer:mtlIndexBuffer
@@ -306,13 +304,15 @@
     fGpu->stats()->incNumDraws();
 }
 
-void GrMtlOpsRenderPass::sendInstancedMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh,
-                                                int vertexCount, int baseVertex, int instanceCount,
-                                                int baseInstance) {
-    this->bindGeometry(mesh.vertexBuffer(), 0, mesh.instanceBuffer());
+void GrMtlOpsRenderPass::onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount,
+                                         int baseInstance, const GrBuffer* vertexBuffer,
+                                         int vertexCount, int baseVertex) {
+    SkASSERT(fActivePipelineState);
+    SkASSERT(nil != fActiveRenderCmdEncoder);
+    this->bindGeometry(vertexBuffer, 0, instanceBuffer);
 
     if (@available(macOS 10.11, iOS 9.0, *)) {
-        [fActiveRenderCmdEncoder drawPrimitives:gr_to_mtl_primitive(primitiveType)
+        [fActiveRenderCmdEncoder drawPrimitives:fActivePrimitiveType
                                     vertexStart:baseVertex
                                     vertexCount:vertexCount
                                   instanceCount:instanceCount
@@ -322,27 +322,29 @@
     }
 }
 
-void GrMtlOpsRenderPass::sendIndexedInstancedMeshToGpu(GrPrimitiveType primitiveType,
-                                                       const GrMesh& mesh, int indexCount,
-                                                       int baseIndex, int baseVertex,
-                                                       int instanceCount, int baseInstance) {
-    this->bindGeometry(mesh.vertexBuffer(), 0, mesh.instanceBuffer());
+void GrMtlOpsRenderPass::onDrawIndexedInstanced(
+        const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+        GrPrimitiveRestart primitiveRestart, const GrBuffer* instanceBuffer, int instanceCount,
+        int baseInstance, const GrBuffer* vertexBuffer, int baseVertex) {
+    SkASSERT(fActivePipelineState);
+    SkASSERT(nil != fActiveRenderCmdEncoder);
+    this->bindGeometry(vertexBuffer, 0, instanceBuffer);
 
     id<MTLBuffer> mtlIndexBuffer = nil;
-    if (mesh.indexBuffer()) {
-        SkASSERT(!mesh.indexBuffer()->isCpuBuffer());
-        SkASSERT(!static_cast<const GrGpuBuffer*>(mesh.indexBuffer())->isMapped());
+    if (indexBuffer) {
+        SkASSERT(!indexBuffer->isCpuBuffer());
+        SkASSERT(!static_cast<const GrGpuBuffer*>(indexBuffer)->isMapped());
 
-        mtlIndexBuffer = static_cast<const GrMtlBuffer*>(mesh.indexBuffer())->mtlBuffer();
+        mtlIndexBuffer = static_cast<const GrMtlBuffer*>(indexBuffer)->mtlBuffer();
         SkASSERT(mtlIndexBuffer);
     }
 
-    SkASSERT(mesh.primitiveRestart() == GrPrimitiveRestart::kNo);
-    size_t indexOffset = static_cast<const GrMtlBuffer*>(mesh.indexBuffer())->offset() +
+    SkASSERT(primitiveRestart == GrPrimitiveRestart::kNo);
+    size_t indexOffset = static_cast<const GrMtlBuffer*>(indexBuffer)->offset() +
                          sizeof(uint16_t) * baseIndex;
 
     if (@available(macOS 10.11, iOS 9.0, *)) {
-        [fActiveRenderCmdEncoder drawIndexedPrimitives:gr_to_mtl_primitive(primitiveType)
+        [fActiveRenderCmdEncoder drawIndexedPrimitives:fActivePrimitiveType
                                             indexCount:indexCount
                                              indexType:MTLIndexTypeUInt16
                                            indexBuffer:mtlIndexBuffer
diff --git a/src/gpu/ops/GrFillRRectOp.cpp b/src/gpu/ops/GrFillRRectOp.cpp
index 7941f31..d8bfa34 100644
--- a/src/gpu/ops/GrFillRRectOp.cpp
+++ b/src/gpu/ops/GrFillRRectOp.cpp
@@ -19,15 +19,120 @@
 #include "src/gpu/glsl/GrGLSLGeometryProcessor.h"
 #include "src/gpu/glsl/GrGLSLVarying.h"
 #include "src/gpu/glsl/GrGLSLVertexGeoBuilder.h"
+#include "src/gpu/ops/GrDrawOp.h"
+
+namespace {
+
+class FillRRectOp : public GrDrawOp {
+public:
+    DEFINE_OP_CLASS_ID
+
+    static std::unique_ptr<GrDrawOp> Make(GrRecordingContext*,
+                                          GrAAType,
+                                          const SkMatrix& viewMatrix,
+                                          const SkRRect&,
+                                          const GrCaps&,
+                                          GrPaint&&);
+
+    const char* name() const final { return "GrFillRRectOp"; }
+
+    FixedFunctionFlags fixedFunctionFlags() const final {
+        return (GrAAType::kMSAA == fAAType) ? FixedFunctionFlags::kUsesHWAA
+                                            : FixedFunctionFlags::kNone;
+    }
+    GrProcessorSet::Analysis finalize(const GrCaps&, const GrAppliedClip*,
+                                      bool hasMixedSampledCoverage, GrClampType) final;
+    CombineResult onCombineIfPossible(GrOp*, GrRecordingContext::Arenas*, const GrCaps&) final;
+    void visitProxies(const VisitProxyFunc& fn) const override {
+        if (fProgramInfo) {
+            fProgramInfo->visitProxies(fn);
+        } else {
+            fProcessors.visitProxies(fn);
+        }
+    }
+
+    void onPrePrepare(GrRecordingContext*, const GrSurfaceProxyView*, GrAppliedClip*,
+                      const GrXferProcessor::DstProxyView&) final;
+
+    void onPrepare(GrOpFlushState*) final;
+
+    void onExecute(GrOpFlushState*, const SkRect& chainBounds) final;
+
+private:
+    enum class Flags {
+        kNone             = 0,
+        kUseHWDerivatives = 1 << 0,
+        kHasPerspective   = 1 << 1,
+        kHasLocalCoords   = 1 << 2,
+        kWideColor        = 1 << 3
+    };
+
+    GR_DECL_BITFIELD_CLASS_OPS_FRIENDS(Flags);
+
+    class Processor;
+
+    FillRRectOp(GrAAType, const SkRRect&, Flags, const SkMatrix& totalShapeMatrix,
+                GrPaint&&, const SkRect& devBounds);
+
+    // These methods are used to append data of various POD types to our internal array of instance
+    // data. The actual layout of the instance buffer can vary from Op to Op.
+    template <typename T> inline T* appendInstanceData(int count) {
+        static_assert(std::is_pod<T>::value, "");
+        static_assert(4 == alignof(T), "");
+        return reinterpret_cast<T*>(fInstanceData.push_back_n(sizeof(T) * count));
+    }
+
+    template <typename T, typename... Args>
+    inline void writeInstanceData(const T& val, const Args&... remainder) {
+        memcpy(this->appendInstanceData<T>(1), &val, sizeof(T));
+        this->writeInstanceData(remainder...);
+    }
+
+    void writeInstanceData() {}  // Halt condition.
+
+    // Create a GrProgramInfo object in the provided arena
+    GrProgramInfo* createProgramInfo(const GrCaps*,
+                                     SkArenaAlloc*,
+                                     const GrSurfaceProxyView* dstView,
+                                     GrAppliedClip&&,
+                                     const GrXferProcessor::DstProxyView&);
+
+    const GrAAType fAAType;
+    const SkPMColor4f fOriginalColor;
+    const SkRect fLocalRect;
+    Flags fFlags;
+    GrProcessorSet fProcessors;
+
+    SkSTArray<sizeof(float) * 16 * 4, char, /*MEM_MOVE=*/ true> fInstanceData;
+    int fInstanceCount = 1;
+    int fInstanceStride = 0;
+
+    sk_sp<const GrBuffer> fInstanceBuffer;
+    sk_sp<const GrBuffer> fVertexBuffer;
+    sk_sp<const GrBuffer> fIndexBuffer;
+    int fBaseInstance = 0;
+    int fIndexCount = 0;
+
+    // If this op is prePrepared the created programInfo will be stored here for use in
+    // onExecute. In the prePrepared case it will have been stored in the record-time arena.
+    GrProgramInfo* fProgramInfo = nullptr;
+
+    friend class ::GrOpMemoryPool;
+};
+
+GR_MAKE_BITFIELD_CLASS_OPS(FillRRectOp::Flags)
 
 // Hardware derivatives are not always accurate enough for highly elliptical corners. This method
 // checks to make sure the corners will still all look good if we use HW derivatives.
 static bool can_use_hw_derivatives_with_coverage(
         const GrShaderCaps&, const SkMatrix&, const SkRRect&);
 
-std::unique_ptr<GrFillRRectOp> GrFillRRectOp::Make(
-        GrRecordingContext* ctx, GrAAType aaType, const SkMatrix& viewMatrix, const SkRRect& rrect,
-        const GrCaps& caps, GrPaint&& paint) {
+std::unique_ptr<GrDrawOp> FillRRectOp::Make(GrRecordingContext* ctx,
+                                            GrAAType aaType,
+                                            const SkMatrix& viewMatrix,
+                                            const SkRRect& rrect,
+                                            const GrCaps& caps,
+                                            GrPaint&& paint) {
     if (!caps.instanceAttribSupport()) {
         return nullptr;
     }
@@ -91,17 +196,17 @@
     }
 
     GrOpMemoryPool* pool = ctx->priv().opMemoryPool();
-    return pool->allocate<GrFillRRectOp>(aaType, rrect, flags, m, std::move(paint), devBounds);
+    return pool->allocate<FillRRectOp>(aaType, rrect, flags, m, std::move(paint), devBounds);
 }
 
-GrFillRRectOp::GrFillRRectOp(GrAAType aaType, const SkRRect& rrect, Flags flags,
-                             const SkMatrix& totalShapeMatrix, GrPaint&& paint,
-                             const SkRect& devBounds)
+FillRRectOp::FillRRectOp(GrAAType aaType, const SkRRect& rrect, Flags flags,
+                         const SkMatrix& totalShapeMatrix, GrPaint&& paint,
+                         const SkRect& devBounds)
         : GrDrawOp(ClassID())
         , fAAType(aaType)
         , fOriginalColor(paint.getColor4f())
         , fLocalRect(rrect.rect())
-        , fFlags(flags)
+        , fFlags(flags & ~(Flags::kHasLocalCoords | Flags::kWideColor))
         , fProcessors(std::move(paint)) {
     SkASSERT((fFlags & Flags::kHasPerspective) == totalShapeMatrix.hasPerspective());
     this->setBounds(devBounds, GrOp::HasAABloat::kYes, GrOp::IsHairline::kNo);
@@ -128,7 +233,7 @@
     // We will write the color and local rect attribs during finalize().
 }
 
-GrProcessorSet::Analysis GrFillRRectOp::finalize(
+GrProcessorSet::Analysis FillRRectOp::finalize(
         const GrCaps& caps, const GrAppliedClip* clip, bool hasMixedSampledCoverage,
         GrClampType clampType) {
     SkASSERT(1 == fInstanceCount);
@@ -157,9 +262,10 @@
     return analysis;
 }
 
-GrDrawOp::CombineResult GrFillRRectOp::onCombineIfPossible(GrOp* op, GrRecordingContext::Arenas*,
-                                                           const GrCaps&) {
-    const auto& that = *op->cast<GrFillRRectOp>();
+GrDrawOp::CombineResult FillRRectOp::onCombineIfPossible(GrOp* op,
+                                                         GrRecordingContext::Arenas*,
+                                                         const GrCaps&) {
+    const auto& that = *op->cast<FillRRectOp>();
     if (fFlags != that.fFlags || fProcessors != that.fProcessors ||
         fInstanceData.count() > std::numeric_limits<int>::max() - that.fInstanceData.count()) {
         return CombineResult::kCannotCombine;
@@ -171,7 +277,7 @@
     return CombineResult::kMerged;
 }
 
-class GrFillRRectOp::Processor : public GrGeometryProcessor {
+class FillRRectOp::Processor : public GrGeometryProcessor {
 public:
     static GrGeometryProcessor* Make(SkArenaAlloc* arena, GrAAType aaType, Flags flags) {
         return arena->make<Processor>(aaType, flags);
@@ -239,7 +345,7 @@
     typedef GrGeometryProcessor INHERITED;
 };
 
-constexpr GrPrimitiveProcessor::Attribute GrFillRRectOp::Processor::kVertexAttribs[];
+constexpr GrPrimitiveProcessor::Attribute FillRRectOp::Processor::kVertexAttribs[];
 
 // Our coverage geometry consists of an inset octagon with solid coverage, surrounded by linear
 // coverage ramps on the horizontal and vertical edges, and "arc coverage" pieces on the diagonal
@@ -448,10 +554,10 @@
 
 GR_DECLARE_STATIC_UNIQUE_KEY(gMSAAIndexBufferKey);
 
-void GrFillRRectOp::onPrePrepare(GrRecordingContext* context,
-                                 const GrSurfaceProxyView* dstView,
-                                 GrAppliedClip* clip,
-                                 const GrXferProcessor::DstProxyView& dstProxyView) {
+void FillRRectOp::onPrePrepare(GrRecordingContext* context,
+                               const GrSurfaceProxyView* dstView,
+                               GrAppliedClip* clip,
+                               const GrXferProcessor::DstProxyView& dstProxyView) {
     SkArenaAlloc* arena = context->priv().recordTimeAllocator();
 
     // This is equivalent to a GrOpFlushState::detachAppliedClip
@@ -467,7 +573,7 @@
     context->priv().recordProgramInfo(fProgramInfo);
 }
 
-void GrFillRRectOp::onPrepare(GrOpFlushState* flushState) {
+void FillRRectOp::onPrepare(GrOpFlushState* flushState) {
     if (void* instanceData = flushState->makeVertexSpace(fInstanceStride, fInstanceCount,
                                                          &fInstanceBuffer, &fBaseInstance)) {
         SkASSERT(fInstanceStride * fInstanceCount == fInstanceData.count());
@@ -505,7 +611,7 @@
     }
 }
 
-class GrFillRRectOp::Processor::CoverageImpl : public GrGLSLGeometryProcessor {
+class FillRRectOp::Processor::CoverageImpl : public GrGLSLGeometryProcessor {
     void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
         const auto& proc = args.fGP.cast<Processor>();
         bool useHWDerivatives = (proc.fFlags & Flags::kUseHWDerivatives);
@@ -642,7 +748,7 @@
 };
 
 
-class GrFillRRectOp::Processor::MSAAImpl : public GrGLSLGeometryProcessor {
+class FillRRectOp::Processor::MSAAImpl : public GrGLSLGeometryProcessor {
     void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
         const auto& proc = args.fGP.cast<Processor>();
         bool useHWDerivatives = (proc.fFlags & Flags::kUseHWDerivatives);
@@ -747,7 +853,7 @@
     }
 };
 
-GrGLSLPrimitiveProcessor* GrFillRRectOp::Processor::createGLSLInstance(
+GrGLSLPrimitiveProcessor* FillRRectOp::Processor::createGLSLInstance(
         const GrShaderCaps&) const {
     if (GrAAType::kCoverage != fAAType) {
         return new MSAAImpl();
@@ -755,11 +861,11 @@
     return new CoverageImpl();
 }
 
-GrProgramInfo* GrFillRRectOp::createProgramInfo(const GrCaps* caps,
-                                                SkArenaAlloc* arena,
-                                                const GrSurfaceProxyView* dstView,
-                                                GrAppliedClip&& appliedClip,
-                                                const GrXferProcessor::DstProxyView& dstProxyView) {
+GrProgramInfo* FillRRectOp::createProgramInfo(const GrCaps* caps,
+                                              SkArenaAlloc* arena,
+                                              const GrSurfaceProxyView* dstView,
+                                              GrAppliedClip&& appliedClip,
+                                              const GrXferProcessor::DstProxyView& dstProxyView) {
     GrGeometryProcessor* geomProc = Processor::Make(arena, fAAType, fFlags);
     SkASSERT(geomProc->instanceStride() == (size_t)fInstanceStride);
 
@@ -794,7 +900,7 @@
                                       GrPrimitiveType::kTriangles);
 }
 
-void GrFillRRectOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
+void FillRRectOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
     if (!fInstanceBuffer || !fIndexBuffer || !fVertexBuffer) {
         return;  // Setup failed.
     }
@@ -876,3 +982,46 @@
     }
     SK_ABORT("Invalid round rect type.");
 }
+
+} // anonymous namespace
+
+
+std::unique_ptr<GrDrawOp> GrFillRRectOp::Make(GrRecordingContext* ctx,
+                                              GrAAType aaType,
+                                              const SkMatrix& viewMatrix,
+                                              const SkRRect& rrect,
+                                              const GrCaps& caps,
+                                              GrPaint&& paint) {
+    return FillRRectOp::Make(ctx, aaType, viewMatrix, rrect, caps, std::move(paint));
+}
+
+#if GR_TEST_UTILS
+
+#include "src/gpu/GrDrawOpTest.h"
+
+GR_DRAW_OP_TEST_DEFINE(FillRRectOp) {
+    const GrCaps* caps = context->priv().caps();
+
+    SkMatrix viewMatrix = GrTest::TestMatrix(random);
+    GrAAType aaType = GrAAType::kNone;
+    if (random->nextBool()) {
+        aaType = (numSamples > 1) ? GrAAType::kMSAA : GrAAType::kCoverage;
+    }
+
+    SkRect rect = GrTest::TestRect(random);
+    float w = rect.width();
+    float h = rect.height();
+
+    SkRRect rrect;
+    // TODO: test out other rrect configurations
+    rrect.setNinePatch(rect, w / 3.0f, h / 4.0f, w / 5.0f, h / 6.0);
+
+    return GrFillRRectOp::Make(context,
+                               aaType,
+                               viewMatrix,
+                               rrect,
+                               *caps,
+                               std::move(paint));
+}
+
+#endif
diff --git a/src/gpu/ops/GrFillRRectOp.h b/src/gpu/ops/GrFillRRectOp.h
index 09bfa16..7a4b860 100644
--- a/src/gpu/ops/GrFillRRectOp.h
+++ b/src/gpu/ops/GrFillRRectOp.h
@@ -8,105 +8,22 @@
 #ifndef GrFillRRectOp_DEFINED
 #define GrFillRRectOp_DEFINED
 
-#include "src/gpu/GrProgramInfo.h"
-#include "src/gpu/ops/GrDrawOp.h"
+#include "include/private/GrTypesPriv.h"
 
+class GrCaps;
+class GrDrawOp;
+class GrPaint;
 class GrRecordingContext;
+class SkMatrix;
+class SkRRect;
 
-class GrFillRRectOp : public GrDrawOp {
-public:
-    DEFINE_OP_CLASS_ID
-
-    static std::unique_ptr<GrFillRRectOp> Make(
-            GrRecordingContext*, GrAAType, const SkMatrix& viewMatrix, const SkRRect&,
-            const GrCaps&, GrPaint&&);
-
-    const char* name() const final { return "GrFillRRectOp"; }
-
-    FixedFunctionFlags fixedFunctionFlags() const final {
-        return (GrAAType::kMSAA == fAAType) ? FixedFunctionFlags::kUsesHWAA
-                                            : FixedFunctionFlags::kNone;
-    }
-    GrProcessorSet::Analysis finalize(const GrCaps&, const GrAppliedClip*,
-                                      bool hasMixedSampledCoverage, GrClampType) final;
-    CombineResult onCombineIfPossible(GrOp*, GrRecordingContext::Arenas*, const GrCaps&) final;
-    void visitProxies(const VisitProxyFunc& fn) const override {
-        if (fProgramInfo) {
-            fProgramInfo->visitProxies(fn);
-        } else {
-            fProcessors.visitProxies(fn);
-        }
-    }
-
-    void onPrePrepare(GrRecordingContext*, const GrSurfaceProxyView*, GrAppliedClip*,
-                      const GrXferProcessor::DstProxyView&) final;
-
-    void onPrepare(GrOpFlushState*) final;
-
-    void onExecute(GrOpFlushState*, const SkRect& chainBounds) final;
-
-private:
-    enum class Flags {
-        kNone = 0,
-        kUseHWDerivatives = 1 << 0,
-        kHasPerspective = 1 << 1,
-        kHasLocalCoords = 1 << 2,
-        kWideColor = 1 << 3
-    };
-
-    GR_DECL_BITFIELD_CLASS_OPS_FRIENDS(Flags);
-
-    class Processor;
-
-    GrFillRRectOp(GrAAType, const SkRRect&, Flags, const SkMatrix& totalShapeMatrix,
-                  GrPaint&&, const SkRect& devBounds);
-
-    // These methods are used to append data of various POD types to our internal array of instance
-    // data. The actual layout of the instance buffer can vary from Op to Op.
-    template <typename T> inline T* appendInstanceData(int count) {
-        static_assert(std::is_pod<T>::value, "");
-        static_assert(4 == alignof(T), "");
-        return reinterpret_cast<T*>(fInstanceData.push_back_n(sizeof(T) * count));
-    }
-
-    template <typename T, typename... Args>
-    inline void writeInstanceData(const T& val, const Args&... remainder) {
-        memcpy(this->appendInstanceData<T>(1), &val, sizeof(T));
-        this->writeInstanceData(remainder...);
-    }
-
-    void writeInstanceData() {}  // Halt condition.
-
-    // Create a GrProgramInfo object in the provided arena
-    GrProgramInfo* createProgramInfo(const GrCaps*,
-                                     SkArenaAlloc*,
-                                     const GrSurfaceProxyView* dstView,
-                                     GrAppliedClip&&,
-                                     const GrXferProcessor::DstProxyView&);
-
-    const GrAAType fAAType;
-    const SkPMColor4f fOriginalColor;
-    const SkRect fLocalRect;
-    Flags fFlags;
-    GrProcessorSet fProcessors;
-
-    SkSTArray<sizeof(float) * 16 * 4, char, /*MEM_MOVE=*/ true> fInstanceData;
-    int fInstanceCount = 1;
-    int fInstanceStride = 0;
-
-    sk_sp<const GrBuffer> fInstanceBuffer;
-    sk_sp<const GrBuffer> fVertexBuffer;
-    sk_sp<const GrBuffer> fIndexBuffer;
-    int fBaseInstance = 0;
-    int fIndexCount = 0;
-
-    // If this op is prePrepared the created programInfo will be stored here from use in
-    // onExecute. In the prePrepared case it will have been stored in the record-time arena.
-    GrProgramInfo* fProgramInfo = nullptr;
-
-    friend class GrOpMemoryPool;
+namespace GrFillRRectOp {
+    std::unique_ptr<GrDrawOp> Make(GrRecordingContext*,
+                                   GrAAType,
+                                   const SkMatrix& viewMatrix,
+                                   const SkRRect&,
+                                   const GrCaps&,
+                                   GrPaint&&);
 };
 
-GR_MAKE_BITFIELD_CLASS_OPS(GrFillRRectOp::Flags)
-
 #endif
diff --git a/src/gpu/vk/GrVkOpsRenderPass.cpp b/src/gpu/vk/GrVkOpsRenderPass.cpp
index 92319a5..695e2c2 100644
--- a/src/gpu/vk/GrVkOpsRenderPass.cpp
+++ b/src/gpu/vk/GrVkOpsRenderPass.cpp
@@ -586,43 +586,45 @@
                                                      this->currentCommandBuffer());
 }
 
-void GrVkOpsRenderPass::onDrawMesh(GrPrimitiveType primitiveType, const GrMesh& mesh) {
+void GrVkOpsRenderPass::onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount,
+                                        int baseInstance, const GrBuffer* vertexBuffer,
+                                        int vertexCount, int baseVertex) {
     if (!fCurrentRenderPass) {
         SkASSERT(fGpu->isDeviceLost());
         return;
     }
-
     SkASSERT(fCurrentPipelineState);
-    mesh.sendToGpu(primitiveType, this);
-    fCurrentCBIsEmpty = false;
-}
-
-void GrVkOpsRenderPass::sendInstancedMeshToGpu(GrPrimitiveType, const GrMesh& mesh, int vertexCount,
-                                               int baseVertex, int instanceCount,
-                                               int baseInstance) {
-    SkASSERT(!mesh.vertexBuffer() || !mesh.vertexBuffer()->isCpuBuffer());
-    SkASSERT(!mesh.instanceBuffer() || !mesh.instanceBuffer()->isCpuBuffer());
-    auto gpuVertexBuffer = static_cast<const GrGpuBuffer*>(mesh.vertexBuffer());
-    auto gpuInstanceBuffer = static_cast<const GrGpuBuffer*>(mesh.instanceBuffer());
+    SkASSERT(!vertexBuffer || !vertexBuffer->isCpuBuffer());
+    SkASSERT(!instanceBuffer || !instanceBuffer->isCpuBuffer());
+    auto gpuVertexBuffer = static_cast<const GrGpuBuffer*>(vertexBuffer);
+    auto gpuInstanceBuffer = static_cast<const GrGpuBuffer*>(instanceBuffer);
     this->bindGeometry(nullptr, gpuVertexBuffer, gpuInstanceBuffer);
     this->currentCommandBuffer()->draw(fGpu, vertexCount, instanceCount, baseVertex, baseInstance);
     fGpu->stats()->incNumDraws();
+    fCurrentCBIsEmpty = false;
 }
 
-void GrVkOpsRenderPass::sendIndexedInstancedMeshToGpu(GrPrimitiveType, const GrMesh& mesh,
-                                                      int indexCount, int baseIndex, int baseVertex,
-                                                      int instanceCount, int baseInstance) {
-    SkASSERT(mesh.primitiveRestart() == GrPrimitiveRestart::kNo);
-    SkASSERT(!mesh.vertexBuffer() || !mesh.vertexBuffer()->isCpuBuffer());
-    SkASSERT(!mesh.instanceBuffer() || !mesh.instanceBuffer()->isCpuBuffer());
-    SkASSERT(!mesh.indexBuffer()->isCpuBuffer());
-    auto gpuIndexxBuffer = static_cast<const GrGpuBuffer*>(mesh.indexBuffer());
-    auto gpuVertexBuffer = static_cast<const GrGpuBuffer*>(mesh.vertexBuffer());
-    auto gpuInstanceBuffer = static_cast<const GrGpuBuffer*>(mesh.instanceBuffer());
+void GrVkOpsRenderPass::onDrawIndexedInstanced(
+        const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+        GrPrimitiveRestart primitiveRestart, const GrBuffer* instanceBuffer, int instanceCount,
+        int baseInstance, const GrBuffer* vertexBuffer, int baseVertex) {
+    if (!fCurrentRenderPass) {
+        SkASSERT(fGpu->isDeviceLost());
+        return;
+    }
+    SkASSERT(fCurrentPipelineState);
+    SkASSERT(primitiveRestart == GrPrimitiveRestart::kNo);
+    SkASSERT(!vertexBuffer || !vertexBuffer->isCpuBuffer());
+    SkASSERT(!instanceBuffer || !instanceBuffer->isCpuBuffer());
+    SkASSERT(!indexBuffer->isCpuBuffer());
+    auto gpuIndexxBuffer = static_cast<const GrGpuBuffer*>(indexBuffer);
+    auto gpuVertexBuffer = static_cast<const GrGpuBuffer*>(vertexBuffer);
+    auto gpuInstanceBuffer = static_cast<const GrGpuBuffer*>(instanceBuffer);
     this->bindGeometry(gpuIndexxBuffer, gpuVertexBuffer, gpuInstanceBuffer);
     this->currentCommandBuffer()->drawIndexed(fGpu, indexCount, instanceCount,
                                               baseIndex, baseVertex, baseInstance);
     fGpu->stats()->incNumDraws();
+    fCurrentCBIsEmpty = false;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/vk/GrVkOpsRenderPass.h b/src/gpu/vk/GrVkOpsRenderPass.h
index 96bbe56..7ecdbe8 100644
--- a/src/gpu/vk/GrVkOpsRenderPass.h
+++ b/src/gpu/vk/GrVkOpsRenderPass.h
@@ -23,7 +23,7 @@
 class GrVkRenderTarget;
 class GrVkSecondaryCommandBuffer;
 
-class GrVkOpsRenderPass : public GrOpsRenderPass, private GrMesh::SendToGpuImpl {
+class GrVkOpsRenderPass : public GrOpsRenderPass {
 public:
     GrVkOpsRenderPass(GrVkGpu*);
 
@@ -72,27 +72,22 @@
     void onSetScissorRect(const SkIRect&) override;
     bool onBindTextures(const GrPrimitiveProcessor&, const GrPipeline&,
                         const GrSurfaceProxy* const primProcTextures[]) override;
-    void onDrawMesh(GrPrimitiveType, const GrMesh&) override;
-
-    // GrMesh::SendToGpuImpl methods. These issue the actual Vulkan draw commands.
-    // Marked final as a hint to the compiler to not use virtual dispatch.
-    void sendArrayMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh, int vertexCount,
-                            int baseVertex) final {
-        SkASSERT(!mesh.instanceBuffer());
-        this->sendInstancedMeshToGpu(primitiveType, mesh, vertexCount, baseVertex, 1, 0);
+    void onDraw(const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override {
+        this->onDrawInstanced(nullptr, 1, 0, vertexBuffer, vertexCount, baseVertex);
     }
-    void sendIndexedMeshToGpu(GrPrimitiveType primitiveType, const GrMesh& mesh, int indexCount,
-                              int baseIndex, uint16_t minIndexValue, uint16_t maxIndexValue,
-                              int baseVertex) final {
-        SkASSERT(!mesh.instanceBuffer());
-        this->sendIndexedInstancedMeshToGpu(primitiveType, mesh, indexCount, baseIndex, baseVertex,
-                                            1, 0);
+    void onDrawIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                       GrPrimitiveRestart primitiveRestart, uint16_t minIndexValue,
+                       uint16_t maxIndexValue, const GrBuffer* vertexBuffer,
+                       int baseVertex) override {
+        this->onDrawIndexedInstanced(indexBuffer, indexCount, baseIndex, primitiveRestart, nullptr,
+                                     1, 0, vertexBuffer, baseVertex);
     }
-    void sendInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int vertexCount, int baseVertex,
-                                int instanceCount, int baseInstance) final;
-    void sendIndexedInstancedMeshToGpu(GrPrimitiveType, const GrMesh&, int indexCount,
-                                       int baseIndex, int baseVertex, int instanceCount,
-                                       int baseInstance) final;
+    void onDrawInstanced(const GrBuffer* instanceBuffer, int instanceCount, int baseInstance,
+                         const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) override;
+    void onDrawIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, int baseIndex,
+                                GrPrimitiveRestart, const GrBuffer* instanceBuffer,
+                                int instanceCount, int baseInstance, const GrBuffer* vertexBuffer,
+                                int baseVertex) override;
 
     void onClear(const GrFixedClip&, const SkPMColor4f& color) override;
 
diff --git a/src/sksl/SkSLByteCode.h b/src/sksl/SkSLByteCode.h
index d5a38e2..2d7df2e 100644
--- a/src/sksl/SkSLByteCode.h
+++ b/src/sksl/SkSLByteCode.h
@@ -124,37 +124,37 @@
         // uint8_t argumentSize
         kCallExternal,
         // Register target, Register src1, Register src2
-        kCompareEQF,
+        V(kCompareEQF),
         // Register target, Register src1, Register src2
-        kCompareEQI,
+        V(kCompareEQI),
         // Register target, Register src1, Register src2
-        kCompareNEQF,
+        V(kCompareNEQF),
         // Register target, Register src1, Register src2
-        kCompareNEQI,
+        V(kCompareNEQI),
         // Register target, Register src1, Register src2
-        kCompareGTF,
+        V(kCompareGTF),
         // Register target, Register src1, Register src2
-        kCompareGTS,
+        V(kCompareGTS),
         // Register target, Register src1, Register src2
-        kCompareGTU,
+        V(kCompareGTU),
         // Register target, Register src1, Register src2
-        kCompareGTEQF,
+        V(kCompareGTEQF),
         // Register target, Register src1, Register src2
-        kCompareGTEQS,
+        V(kCompareGTEQS),
         // Register target, Register src1, Register src2
-        kCompareGTEQU,
+        V(kCompareGTEQU),
         // Register target, Register src1, Register src2
-        kCompareLTF,
+        V(kCompareLTF),
         // Register target, Register src1, Register src2
-        kCompareLTS,
+        V(kCompareLTS),
         // Register target, Register src1, Register src2
-        kCompareLTU,
+        V(kCompareLTU),
         // Register target, Register src1, Register src2
-        kCompareLTEQF,
+        V(kCompareLTEQF),
         // Register target, Register src1, Register src2
-        kCompareLTEQS,
+        V(kCompareLTEQS),
         // Register target, Register src1, Register src2
-        kCompareLTEQU,
+        V(kCompareLTEQU),
         // no parameters
         kContinue,
         // Register target, Register src
@@ -248,8 +248,8 @@
         kReturnValue,
         // Register target, Register src, uint8_t columns, uint8_t rows
         kScalarToMatrix,
-        // Register target, Register test, Register ifTrue, Register ifFalse
-        kSelect,
+        // Register target, Register ifFalse, Register ifTrue, Register test (To match GLSL mix)
+        V(kSelect),
         // Register target, Register src, uint8_t count
         kShiftLeft,
         // Register target, Register src, uint8_t count
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index 3a65d8c..b8cd2ac 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -14,14 +14,37 @@
     : INHERITED(program, errors, nullptr)
     , fOutput(output)
     , fIntrinsics {
-        // "Normal" intrinsics are all $genType f($genType), mapped to a single instruction
-        { "cos",     ByteCode::Instruction::kCos },
-        { "sin",     ByteCode::Instruction::kSin },
-        { "sqrt",    ByteCode::Instruction::kSqrt },
-        { "tan",     ByteCode::Instruction::kTan },
+        // "Normal" intrinsics are all $genType f($genType [, $genType...])
+        // and all map to a single instruction (possibly with a vector version)
+        { "cos",     { ByteCode::Instruction::kCos,    false } },
+        { "mix",     { ByteCode::Instruction::kSelect, true  } },
+        { "not",     { ByteCode::Instruction::kNot,    false } },
+        { "sin",     { ByteCode::Instruction::kSin,    false } },
+        { "sqrt",    { ByteCode::Instruction::kSqrt,   false } },
+        { "tan",     { ByteCode::Instruction::kTan,    false } },
+
+        { "lessThan",         { ByteCode::Instruction::kCompareLTF,
+                                ByteCode::Instruction::kCompareLTS,
+                                ByteCode::Instruction::kCompareLTU, true } },
+        { "lessThanEqual",    { ByteCode::Instruction::kCompareLTEQF,
+                                ByteCode::Instruction::kCompareLTEQS,
+                                ByteCode::Instruction::kCompareLTEQU, true } },
+        { "greaterThan",      { ByteCode::Instruction::kCompareGTF,
+                                ByteCode::Instruction::kCompareGTS,
+                                ByteCode::Instruction::kCompareGTU, true } },
+        { "greaterThanEqual", { ByteCode::Instruction::kCompareGTEQF,
+                                ByteCode::Instruction::kCompareGTEQS,
+                                ByteCode::Instruction::kCompareGTEQU, true } },
+        { "equal",            { ByteCode::Instruction::kCompareEQF,
+                                ByteCode::Instruction::kCompareEQI,
+                                ByteCode::Instruction::kCompareEQI, true } },
+        { "notEqual",         { ByteCode::Instruction::kCompareNEQF,
+                                ByteCode::Instruction::kCompareNEQI,
+                                ByteCode::Instruction::kCompareNEQI, true } },
 
         // Special intrinsics have other signatures, or non-standard code-gen
-        { "dot",     SpecialIntrinsic::kDot },
+        { "all",     SpecialIntrinsic::kAll },
+        { "any",     SpecialIntrinsic::kAny },
         { "inverse", SpecialIntrinsic::kInverse },
         { "print",   SpecialIntrinsic::kPrint },
     } {}
@@ -914,33 +937,22 @@
                                            ByteCode::Register result) {
     if (intrinsic.fIsSpecial) {
         switch (intrinsic.fValue.fSpecial) {
-            case SpecialIntrinsic::kDot: {
-                SkASSERT(c.fArguments.size() == 2);
+            case SpecialIntrinsic::kAll:
+            case SpecialIntrinsic::kAny: {
+                SkASSERT(c.fArguments.size() == 1);
                 int count = SlotCount(c.fArguments[0]->fType);
-                ByteCode::Register left = this->next(count);
-                this->writeExpression(*c.fArguments[0], left);
-                ByteCode::Register right = this->next(count);
-                this->writeExpression(*c.fArguments[1], right);
-                ByteCode::Register product = this->next(count);
-                this->writeTypedInstruction(c.fType,
-                                            ByteCode::Instruction::kMultiplyIN,
-                                            ByteCode::Instruction::kMultiplyIN,
-                                            ByteCode::Instruction::kMultiplyFN);
-                this->write((uint8_t) count);
-                this->write(product);
-                this->write(left);
-                this->write(right);
-                ByteCode::Register total = product;
+                SkASSERT(count > 1);
+                // Fold a bvec down to a single bool:
+                ByteCode::Register arg = this->next(count);
+                ByteCode::Instruction inst = intrinsic.fValue.fSpecial == SpecialIntrinsic::kAll
+                                                        ? ByteCode::Instruction::kAnd
+                                                        : ByteCode::Instruction::kOr;
+                this->writeExpression(*c.fArguments[0], arg);
                 for (int i = 1; i < count; ++i) {
-                    this->writeTypedInstruction(c.fType,
-                                                ByteCode::Instruction::kAddI,
-                                                ByteCode::Instruction::kAddI,
-                                                ByteCode::Instruction::kAddF);
-                    ByteCode::Register sum = i == count - 1 ? result : this->next(1);
-                    this->write(sum);
-                    this->write(total);
-                    this->write(product + i);
-                    total = sum;
+                    this->write(inst);
+                    this->write(result);
+                    this->write(i == 1 ? arg : result);
+                    this->write(arg + i);
                 }
                 break;
             }
@@ -970,7 +982,7 @@
             }
         }
     } else {
-        int count = SlotCount(c.fType);
+        uint8_t count = (uint8_t) SlotCount(c.fType);
         std::vector<ByteCode::Register> argRegs;
         for (const auto& expr : c.fArguments) {
             SkASSERT(SlotCount(expr->fType) == count);
@@ -978,21 +990,49 @@
             this->writeExpression(*expr, reg);
             argRegs.push_back(reg);
         }
-        for (int i = 0; i < count; ++i) {
-            this->write(intrinsic.fValue.fInstruction);
-            if (c.fType.fName != "void") {
-                this->write(result + i);
+
+        const auto& instructions = intrinsic.fValue.fInstructions;
+        const Type& opType = c.fArguments[0]->fType;
+
+        if (instructions.fUseVector) {
+            if (count == 1) {
+                this->writeTypedInstruction(opType,
+                                            instructions.fFloat,
+                                            instructions.fSigned,
+                                            instructions.fUnsigned);
+            } else {
+                this->writeTypedInstruction(opType,
+                                            VEC(instructions.fFloat),
+                                            VEC(instructions.fSigned),
+                                            VEC(instructions.fUnsigned));
+                this->write(count);
             }
+            this->write(result);
             for (ByteCode::Register arg : argRegs) {
-                this->write(arg + i);
+                this->write(arg);
+            }
+        } else {
+            // No vector version of the instruction exists. Emit the scalar instruction N times.
+            for (uint8_t i = 0; i < count; ++i) {
+                this->writeTypedInstruction(opType,
+                                            instructions.fFloat,
+                                            instructions.fSigned,
+                                            instructions.fUnsigned);
+                this->write(result + i);
+                for (ByteCode::Register arg : argRegs) {
+                    this->write(arg + i);
+                }
             }
         }
     }
 }
 
 void ByteCodeGenerator::writeFunctionCall(const FunctionCall& c, ByteCode::Register result) {
+    // 'mix' is present as both a "pure" intrinsic (fDefined == false), and an SkSL implementation
+    // in the pre-parsed include files (fDefined == true), depending on argument types. We only
+    // send calls to the former through the intrinsic path here.
     auto found = fIntrinsics.find(c.fFunction.fName);
-    if (found != fIntrinsics.end()) {
+    if (found != fIntrinsics.end() && !c.fFunction.fDefined) {
         return this->writeIntrinsicCall(c, found->second, result);
     }
     int argCount = c.fArguments.size();
@@ -1162,9 +1202,9 @@
     for (int i = 0; i < count; ++i) {
         this->write(ByteCode::Instruction::kSelect);
         this->write(result + i);
-        this->write(test);
-        this->write(ifTrue + i);
         this->write(ifFalse + i);
+        this->write(ifTrue + i);
+        this->write(test);
     }
 }
 
diff --git a/src/sksl/SkSLByteCodeGenerator.h b/src/sksl/SkSLByteCodeGenerator.h
index 9afe1a5..15ac882 100644
--- a/src/sksl/SkSLByteCodeGenerator.h
+++ b/src/sksl/SkSLByteCodeGenerator.h
@@ -61,15 +61,21 @@
 private:
     // Intrinsics which do not simply map to a single opcode
     enum class SpecialIntrinsic {
-        kDot,
+        kAll,
+        kAny,
         kInverse,
         kPrint,
     };
 
     struct Intrinsic {
-        Intrinsic(ByteCode::Instruction instruction)
+        Intrinsic(ByteCode::Instruction i, bool useVector)
             : fIsSpecial(false)
-            , fValue(instruction) {}
+            , fValue(i, i, i, useVector) {}
+
+        Intrinsic(ByteCode::Instruction f, ByteCode::Instruction s, ByteCode::Instruction u,
+                  bool useVector)
+            : fIsSpecial(false)
+            , fValue(f, s, u, useVector) {}
 
         Intrinsic(SpecialIntrinsic special)
             : fIsSpecial(true)
@@ -78,13 +84,21 @@
         bool fIsSpecial;
 
         union Value {
-            Value(ByteCode::Instruction instruction)
-                : fInstruction(instruction) {}
+            Value(ByteCode::Instruction f, ByteCode::Instruction s, ByteCode::Instruction u,
+                  bool useVector)
+                : fInstructions{ f, s, u, useVector } {}
 
             Value(SpecialIntrinsic special)
                 : fSpecial(special) {}
 
-            ByteCode::Instruction fInstruction;
+            struct {
+                ByteCode::Instruction fFloat;
+                ByteCode::Instruction fSigned;
+                ByteCode::Instruction fUnsigned;
+
+                bool fUseVector;
+            } fInstructions;
+
             SpecialIntrinsic fSpecial;
         } fValue;
     };
diff --git a/src/sksl/SkSLCompiler.cpp b/src/sksl/SkSLCompiler.cpp
index 92c0c03..ab31425 100644
--- a/src/sksl/SkSLCompiler.cpp
+++ b/src/sksl/SkSLCompiler.cpp
@@ -54,6 +54,10 @@
 #include "sksl_interp.inc"
 ;
 
+static const char* SKSL_INTERP_INLINE_INCLUDE =
+#include "sksl_interp_inline.inc"
+;
+
 static const char* SKSL_VERT_INCLUDE =
 #include "sksl_vert.inc"
 ;
@@ -283,9 +287,13 @@
     this->processIncludeFile(Program::kPipelineStage_Kind, SKSL_PIPELINE_INCLUDE,
                              strlen(SKSL_PIPELINE_INCLUDE), fGpuSymbolTable, &fPipelineInclude,
                              &fPipelineSymbolTable);
+
     this->processIncludeFile(Program::kGeneric_Kind, SKSL_INTERP_INCLUDE,
                              strlen(SKSL_INTERP_INCLUDE), symbols, &fInterpreterInclude,
                              &fInterpreterSymbolTable);
+    this->processIncludeFile(Program::kGeneric_Kind, SKSL_INTERP_INLINE_INCLUDE,
+                             strlen(SKSL_INTERP_INLINE_INCLUDE), std::move(fInterpreterSymbolTable),
+                             &fInterpreterInclude, &fInterpreterSymbolTable);
     grab_intrinsics(&fInterpreterInclude, &fInterpreterIntrinsics);
     // need to hang on to the source so that FunctionDefinition.fSource pointers in this file
     // remain valid
diff --git a/src/sksl/SkSLInterpreter.h b/src/sksl/SkSLInterpreter.h
index 5e4db17..a5cf844 100644
--- a/src/sksl/SkSLInterpreter.h
+++ b/src/sksl/SkSLInterpreter.h
@@ -381,22 +381,22 @@
             DISASSEMBLE_VECTOR_BINARY(kAddF, "addF")
             DISASSEMBLE_VECTOR_BINARY(kAddI, "addI")
             DISASSEMBLE_BINARY(kAnd, "and")
-            DISASSEMBLE_BINARY(kCompareEQF, "compare eqF")
-            DISASSEMBLE_BINARY(kCompareEQI, "compare eqI")
-            DISASSEMBLE_BINARY(kCompareNEQF, "compare neqF")
-            DISASSEMBLE_BINARY(kCompareNEQI, "compare neqI")
-            DISASSEMBLE_BINARY(kCompareGTF, "compare gtF")
-            DISASSEMBLE_BINARY(kCompareGTS, "compare gtS")
-            DISASSEMBLE_BINARY(kCompareGTU, "compare gtU")
-            DISASSEMBLE_BINARY(kCompareGTEQF, "compare gteqF")
-            DISASSEMBLE_BINARY(kCompareGTEQS, "compare gteqS")
-            DISASSEMBLE_BINARY(kCompareGTEQU, "compare gteqU")
-            DISASSEMBLE_BINARY(kCompareLTF, "compare ltF")
-            DISASSEMBLE_BINARY(kCompareLTS, "compare ltS")
-            DISASSEMBLE_BINARY(kCompareLTU, "compare ltU")
-            DISASSEMBLE_BINARY(kCompareLTEQF, "compare lteqF")
-            DISASSEMBLE_BINARY(kCompareLTEQS, "compare lteqS")
-            DISASSEMBLE_BINARY(kCompareLTEQU, "compare lteqU")
+            DISASSEMBLE_VECTOR_BINARY(kCompareEQF, "compare eqF")
+            DISASSEMBLE_VECTOR_BINARY(kCompareEQI, "compare eqI")
+            DISASSEMBLE_VECTOR_BINARY(kCompareNEQF, "compare neqF")
+            DISASSEMBLE_VECTOR_BINARY(kCompareNEQI, "compare neqI")
+            DISASSEMBLE_VECTOR_BINARY(kCompareGTF, "compare gtF")
+            DISASSEMBLE_VECTOR_BINARY(kCompareGTS, "compare gtS")
+            DISASSEMBLE_VECTOR_BINARY(kCompareGTU, "compare gtU")
+            DISASSEMBLE_VECTOR_BINARY(kCompareGTEQF, "compare gteqF")
+            DISASSEMBLE_VECTOR_BINARY(kCompareGTEQS, "compare gteqS")
+            DISASSEMBLE_VECTOR_BINARY(kCompareGTEQU, "compare gteqU")
+            DISASSEMBLE_VECTOR_BINARY(kCompareLTF, "compare ltF")
+            DISASSEMBLE_VECTOR_BINARY(kCompareLTS, "compare ltS")
+            DISASSEMBLE_VECTOR_BINARY(kCompareLTU, "compare ltU")
+            DISASSEMBLE_VECTOR_BINARY(kCompareLTEQF, "compare lteqF")
+            DISASSEMBLE_VECTOR_BINARY(kCompareLTEQS, "compare lteqS")
+            DISASSEMBLE_VECTOR_BINARY(kCompareLTEQU, "compare lteqU")
             DISASSEMBLE_VECTOR_BINARY(kSubtractF, "subF")
             DISASSEMBLE_VECTOR_BINARY(kSubtractI, "subI")
             DISASSEMBLE_VECTOR_BINARY(kDivideF, "divF")
@@ -553,13 +553,23 @@
             }
             case ByteCode::Instruction::kSelect: {
                 ByteCode::Register target = read<ByteCode::Register>(ip);
-                ByteCode::Register test = read<ByteCode::Register>(ip);
-                ByteCode::Register src1 = read<ByteCode::Register>(ip);
                 ByteCode::Register src2 = read<ByteCode::Register>(ip);
+                ByteCode::Register src1 = read<ByteCode::Register>(ip);
+                ByteCode::Register test = read<ByteCode::Register>(ip);
                 printf("select $%d, $%d, $%d -> %d\n", test.fIndex, src1.fIndex, src2.fIndex,
                        target.fIndex);
                 break;
             }
+            case ByteCode::Instruction::kSelectN: {
+                uint8_t count = read<uint8_t>(ip);
+                ByteCode::Register target = read<ByteCode::Register>(ip);
+                ByteCode::Register src2 = read<ByteCode::Register>(ip);
+                ByteCode::Register src1 = read<ByteCode::Register>(ip);
+                ByteCode::Register test = read<ByteCode::Register>(ip);
+                printf("select%d $%d, $%d, $%d -> %d\n", count, test.fIndex, src1.fIndex,
+                       src2.fIndex, target.fIndex);
+                break;
+            }
             DISASSEMBLE_BINARY(kShiftLeft, "shiftLeft")
             DISASSEMBLE_BINARY(kShiftRightS, "shiftRightS")
             DISASSEMBLE_BINARY(kShiftRightU, "shiftRightU")
@@ -745,21 +755,37 @@
             &&kCall,
             &&kCallExternal,
             &&kCompareEQF,
+            &&kCompareEQFN,
             &&kCompareEQI,
+            &&kCompareEQIN,
             &&kCompareNEQF,
+            &&kCompareNEQFN,
             &&kCompareNEQI,
+            &&kCompareNEQIN,
             &&kCompareGTF,
+            &&kCompareGTFN,
             &&kCompareGTS,
+            &&kCompareGTSN,
             &&kCompareGTU,
+            &&kCompareGTUN,
             &&kCompareGTEQF,
+            &&kCompareGTEQFN,
             &&kCompareGTEQS,
+            &&kCompareGTEQSN,
             &&kCompareGTEQU,
+            &&kCompareGTEQUN,
             &&kCompareLTF,
+            &&kCompareLTFN,
             &&kCompareLTS,
+            &&kCompareLTSN,
             &&kCompareLTU,
+            &&kCompareLTUN,
             &&kCompareLTEQF,
+            &&kCompareLTEQFN,
             &&kCompareLTEQS,
+            &&kCompareLTEQSN,
             &&kCompareLTEQU,
+            &&kCompareLTEQUN,
             &&kContinue,
             &&kCopy,
             &&kCos,
@@ -816,6 +842,7 @@
             &&kReturnValue,
             &&kScalarToMatrix,
             &&kSelect,
+            &&kSelectN,
             &&kShiftLeft,
             &&kShiftRightS,
             &&kShiftRightU,
@@ -856,21 +883,37 @@
         CHECK_LABEL(kCall);
         CHECK_LABEL(kCallExternal);
         CHECK_LABEL(kCompareEQF);
+        CHECK_LABEL(kCompareEQFN);
         CHECK_LABEL(kCompareEQI);
+        CHECK_LABEL(kCompareEQIN);
         CHECK_LABEL(kCompareNEQF);
+        CHECK_LABEL(kCompareNEQFN);
         CHECK_LABEL(kCompareNEQI);
+        CHECK_LABEL(kCompareNEQIN);
         CHECK_LABEL(kCompareGTF);
+        CHECK_LABEL(kCompareGTFN);
         CHECK_LABEL(kCompareGTS);
+        CHECK_LABEL(kCompareGTSN);
         CHECK_LABEL(kCompareGTU);
+        CHECK_LABEL(kCompareGTUN);
         CHECK_LABEL(kCompareGTEQF);
+        CHECK_LABEL(kCompareGTEQFN);
         CHECK_LABEL(kCompareGTEQS);
+        CHECK_LABEL(kCompareGTEQSN);
         CHECK_LABEL(kCompareGTEQU);
+        CHECK_LABEL(kCompareGTEQUN);
         CHECK_LABEL(kCompareLTF);
+        CHECK_LABEL(kCompareLTFN);
         CHECK_LABEL(kCompareLTS);
+        CHECK_LABEL(kCompareLTSN);
         CHECK_LABEL(kCompareLTU);
+        CHECK_LABEL(kCompareLTUN);
         CHECK_LABEL(kCompareLTEQF);
+        CHECK_LABEL(kCompareLTEQFN);
         CHECK_LABEL(kCompareLTEQS);
+        CHECK_LABEL(kCompareLTEQSN);
         CHECK_LABEL(kCompareLTEQU);
+        CHECK_LABEL(kCompareLTEQUN);
         CHECK_LABEL(kContinue);
         CHECK_LABEL(kCopy);
         CHECK_LABEL(kCos);
@@ -927,6 +970,7 @@
         CHECK_LABEL(kReturnValue);
         CHECK_LABEL(kScalarToMatrix);
         CHECK_LABEL(kSelect);
+        CHECK_LABEL(kSelectN);
         CHECK_LABEL(kShiftLeft);
         CHECK_LABEL(kShiftRightS);
         CHECK_LABEL(kShiftRightU);
@@ -981,22 +1025,22 @@
                 VECTOR_BINARY_OP(kAddF, fFloat, fFloat, +)
                 VECTOR_BINARY_OP(kAddI, fInt, fInt, +)
                 BINARY_OP(kAnd, fInt, fInt, &)
-                BINARY_OP(kCompareEQF, fFloat, fInt, ==)
-                BINARY_OP(kCompareEQI, fInt, fInt, ==)
-                BINARY_OP(kCompareNEQF, fFloat, fInt, !=)
-                BINARY_OP(kCompareNEQI, fInt, fInt, !=)
-                BINARY_OP(kCompareGTF, fFloat, fInt, >)
-                BINARY_OP(kCompareGTS, fInt, fInt, >)
-                BINARY_OP(kCompareGTU, fUInt, fUInt, >)
-                BINARY_OP(kCompareGTEQF, fFloat, fInt, >=)
-                BINARY_OP(kCompareGTEQS, fInt, fInt, >=)
-                BINARY_OP(kCompareGTEQU, fUInt, fUInt, >=)
-                BINARY_OP(kCompareLTF, fFloat, fInt, <)
-                BINARY_OP(kCompareLTS, fInt, fInt, <)
-                BINARY_OP(kCompareLTU, fUInt, fUInt, <)
-                BINARY_OP(kCompareLTEQF, fFloat, fInt, <=)
-                BINARY_OP(kCompareLTEQS, fInt, fInt, <=)
-                BINARY_OP(kCompareLTEQU, fUInt, fUInt, <=)
+                VECTOR_BINARY_OP(kCompareEQF, fFloat, fInt, ==)
+                VECTOR_BINARY_OP(kCompareEQI, fInt, fInt, ==)
+                VECTOR_BINARY_OP(kCompareNEQF, fFloat, fInt, !=)
+                VECTOR_BINARY_OP(kCompareNEQI, fInt, fInt, !=)
+                VECTOR_BINARY_OP(kCompareGTF, fFloat, fInt, >)
+                VECTOR_BINARY_OP(kCompareGTS, fInt, fInt, >)
+                VECTOR_BINARY_OP(kCompareGTU, fUInt, fUInt, >)
+                VECTOR_BINARY_OP(kCompareGTEQF, fFloat, fInt, >=)
+                VECTOR_BINARY_OP(kCompareGTEQS, fInt, fInt, >=)
+                VECTOR_BINARY_OP(kCompareGTEQU, fUInt, fUInt, >=)
+                VECTOR_BINARY_OP(kCompareLTF, fFloat, fInt, <)
+                VECTOR_BINARY_OP(kCompareLTS, fInt, fInt, <)
+                VECTOR_BINARY_OP(kCompareLTU, fUInt, fUInt, <)
+                VECTOR_BINARY_OP(kCompareLTEQF, fFloat, fInt, <=)
+                VECTOR_BINARY_OP(kCompareLTEQS, fInt, fInt, <=)
+                VECTOR_BINARY_OP(kCompareLTEQU, fUInt, fUInt, <=)
                 VECTOR_BINARY_OP(kSubtractF, fFloat, fFloat, -)
                 VECTOR_BINARY_OP(kSubtractI, fInt, fInt, -)
                 VECTOR_BINARY_OP(kDivideF, fFloat, fFloat, /)
@@ -1469,14 +1513,28 @@
                 }
                 LABEL(kSelect) {
                     ByteCode::Register target = read<ByteCode::Register>(&ip);
-                    ByteCode::Register test = read<ByteCode::Register>(&ip);
-                    ByteCode::Register src1 = read<ByteCode::Register>(&ip);
                     ByteCode::Register src2 = read<ByteCode::Register>(&ip);
+                    ByteCode::Register src1 = read<ByteCode::Register>(&ip);
+                    ByteCode::Register test = read<ByteCode::Register>(&ip);
                     fRegisters[target.fIndex] = skvx::if_then_else(fRegisters[test.fIndex].fInt,
                                                                    fRegisters[src1.fIndex].fFloat,
                                                                    fRegisters[src2.fIndex].fFloat);
                     NEXT();
                 }
+                LABEL(kSelectN) {
+                    uint8_t count = read<uint8_t>(&ip);
+                    ByteCode::Register target = read<ByteCode::Register>(&ip);
+                    ByteCode::Register src2 = read<ByteCode::Register>(&ip);
+                    ByteCode::Register src1 = read<ByteCode::Register>(&ip);
+                    ByteCode::Register test = read<ByteCode::Register>(&ip);
+                    for (int i = 0; i < count; ++i) {
+                        fRegisters[target.fIndex + i] =
+                                skvx::if_then_else(fRegisters[test.fIndex + i].fInt,
+                                                   fRegisters[src1.fIndex + i].fFloat,
+                                                   fRegisters[src2.fIndex + i].fFloat);
+                    }
+                    NEXT();
+                }
                 LABEL(kShiftLeft) {
                     ByteCode::Register target = read<ByteCode::Register>(&ip);
                     ByteCode::Register src = read<ByteCode::Register>(&ip);
diff --git a/src/sksl/sksl_interp.inc b/src/sksl/sksl_interp.inc
index e576f9f..140526f 100644
--- a/src/sksl/sksl_interp.inc
+++ b/src/sksl/sksl_interp.inc
@@ -4,7 +4,6 @@
 
 $genType cos($genType y);
 $genHType cos($genHType y);
-float dot($genType x, $genType y);
 float2x2 inverse(float2x2 m);
 float3x3 inverse(float3x3 m);
 float4x4 inverse(float4x4 m);
@@ -15,41 +14,40 @@
 $genType tan($genType x);
 $genHType tan($genHType x);
 
-float  degrees(float  rad) { return rad * 57.2957795; }
-float2 degrees(float2 rad) { return rad * 57.2957795; }
-float3 degrees(float3 rad) { return rad * 57.2957795; }
-float4 degrees(float4 rad) { return rad * 57.2957795; }
+$genType  mix($genType  x, $genType  y, $genBType a);
+$genHType mix($genHType x, $genHType y, $genBType a);
+$genIType mix($genIType x, $genIType y, $genBType a);
+$genBType mix($genBType x, $genBType y, $genBType a);
 
-float  radians(float  deg) { return deg * 0.0174532925; }
-float2 radians(float2 deg) { return deg * 0.0174532925; }
-float3 radians(float3 deg) { return deg * 0.0174532925; }
-float4 radians(float4 deg) { return deg * 0.0174532925; }
+$bvec lessThan($vec  x, $vec  y);
+$bvec lessThan($hvec x, $hvec y);
+$bvec lessThan($ivec x, $ivec y);
+$bvec lessThan($uvec x, $uvec y);
+$bvec lessThanEqual($vec  x, $vec  y);
+$bvec lessThanEqual($hvec x, $hvec y);
+$bvec lessThanEqual($ivec x, $ivec y);
+$bvec lessThanEqual($uvec x, $uvec y);
+$bvec greaterThan($vec  x, $vec  y);
+$bvec greaterThan($hvec x, $hvec y);
+$bvec greaterThan($ivec x, $ivec y);
+$bvec greaterThan($uvec x, $uvec y);
+$bvec greaterThanEqual($vec  x, $vec  y);
+$bvec greaterThanEqual($hvec x, $hvec y);
+$bvec greaterThanEqual($ivec x, $ivec y);
+$bvec greaterThanEqual($uvec x, $uvec y);
+$bvec equal($vec  x, $vec  y);
+$bvec equal($hvec x, $hvec y);
+$bvec equal($ivec x, $ivec y);
+$bvec equal($uvec x, $uvec y);
+$bvec equal($bvec x, $bvec y);
+$bvec notEqual($vec  x, $vec  y);
+$bvec notEqual($hvec x, $hvec y);
+$bvec notEqual($ivec x, $ivec y);
+$bvec notEqual($uvec x, $uvec y);
+$bvec notEqual($bvec x, $bvec y);
 
-float length(float2 v) { return sqrt(dot(v, v)); }
-float length(float3 v) { return sqrt(dot(v, v)); }
-float length(float4 v) { return sqrt(dot(v, v)); }
-
-float distance(float2 a, float2 b) { return length(a - b); }
-float distance(float3 a, float3 b) { return length(a - b); }
-float distance(float4 a, float4 b) { return length(a - b); }
-
-float2 normalize(float2 v) { return v / length(v); }
-float3 normalize(float3 v) { return v / length(v); }
-float4 normalize(float4 v) { return v / length(v); }
-
-float  mix(float  x, float  y, float t) { return x * (1 - t) + y * t; }
-float2 mix(float2 x, float2 y, float t) { return x * (1 - t) + y * t; }
-float3 mix(float3 x, float3 y, float t) { return x * (1 - t) + y * t; }
-float4 mix(float4 x, float4 y, float t) { return x * (1 - t) + y * t; }
-
-float2 mix(float2 x, float2 y, float2 t) { return x * (1 - t) + y * t; }
-float3 mix(float3 x, float3 y, float3 t) { return x * (1 - t) + y * t; }
-float4 mix(float4 x, float4 y, float4 t) { return x * (1 - t) + y * t; }
-
-float3 cross(float3 a, float3 b) {
-    return float3(a.y * b.z - a.z * b.y,
-                  a.z * b.x - a.x * b.z,
-                  a.x * b.y - a.y * b.x);
-}
+bool  any($bvec x);
+bool  all($bvec x);
+$bvec not($bvec x);
 
 )
diff --git a/src/sksl/sksl_interp_inline.inc b/src/sksl/sksl_interp_inline.inc
new file mode 100644
index 0000000..e23a506
--- /dev/null
+++ b/src/sksl/sksl_interp_inline.inc
@@ -0,0 +1,120 @@
+STRINGIFY(
+
+float dot(float2 a, float2 b) { return a.x*b.x + a.y*b.y; }
+float dot(float3 a, float3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
+float dot(float4 a, float4 b) { return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; }
+
+float  degrees(float  rad) { return rad * 57.2957795; }
+float2 degrees(float2 rad) { return rad * 57.2957795; }
+float3 degrees(float3 rad) { return rad * 57.2957795; }
+float4 degrees(float4 rad) { return rad * 57.2957795; }
+
+float  radians(float  deg) { return deg * 0.0174532925; }
+float2 radians(float2 deg) { return deg * 0.0174532925; }
+float3 radians(float3 deg) { return deg * 0.0174532925; }
+float4 radians(float4 deg) { return deg * 0.0174532925; }
+
+float length(float2 v) { return sqrt(dot(v, v)); }
+float length(float3 v) { return sqrt(dot(v, v)); }
+float length(float4 v) { return sqrt(dot(v, v)); }
+
+float distance(float2 a, float2 b) { return length(a - b); }
+float distance(float3 a, float3 b) { return length(a - b); }
+float distance(float4 a, float4 b) { return length(a - b); }
+
+float2 normalize(float2 v) { return v / length(v); }
+float3 normalize(float3 v) { return v / length(v); }
+float4 normalize(float4 v) { return v / length(v); }
+
+float  mix(float  x, float  y, float t) { return x * (1 - t) + y * t; }
+float2 mix(float2 x, float2 y, float t) { return x * (1 - t) + y * t; }
+float3 mix(float3 x, float3 y, float t) { return x * (1 - t) + y * t; }
+float4 mix(float4 x, float4 y, float t) { return x * (1 - t) + y * t; }
+
+float2 mix(float2 x, float2 y, float2 t) { return x * (1 - t) + y * t; }
+float3 mix(float3 x, float3 y, float3 t) { return x * (1 - t) + y * t; }
+float4 mix(float4 x, float4 y, float4 t) { return x * (1 - t) + y * t; }
+
+float3 cross(float3 a, float3 b) {
+    return float3(a.y * b.z - a.z * b.y,
+                  a.z * b.x - a.x * b.z,
+                  a.x * b.y - a.y * b.x);
+}
+
+float  min(float  x, float  y) { return mix(y, x, x < y); }
+float2 min(float2 x, float2 y) { return mix(y, x, lessThan(x, y)); }
+float3 min(float3 x, float3 y) { return mix(y, x, lessThan(x, y)); }
+float4 min(float4 x, float4 y) { return mix(y, x, lessThan(x, y)); }
+
+float2 min(float2 x, float  y) { return mix(float2(y), x, lessThan(x, float2(y))); }
+float3 min(float3 x, float  y) { return mix(float3(y), x, lessThan(x, float3(y))); }
+float4 min(float4 x, float  y) { return mix(float4(y), x, lessThan(x, float4(y))); }
+
+float  max(float  x, float  y) { return mix(y, x, x > y); }
+float2 max(float2 x, float2 y) { return mix(y, x, greaterThan(x, y)); }
+float3 max(float3 x, float3 y) { return mix(y, x, greaterThan(x, y)); }
+float4 max(float4 x, float4 y) { return mix(y, x, greaterThan(x, y)); }
+
+float2 max(float2 x, float  y) { return mix(float2(y), x, greaterThan(x, float2(y))); }
+float3 max(float3 x, float  y) { return mix(float3(y), x, greaterThan(x, float3(y))); }
+float4 max(float4 x, float  y) { return mix(float4(y), x, greaterThan(x, float4(y))); }
+
+float  clamp(float  x, float  minVal, float  maxVal) { return min(max(x, minVal), maxVal); }
+float2 clamp(float2 x, float2 minVal, float2 maxVal) { return min(max(x, minVal), maxVal); }
+float3 clamp(float3 x, float3 minVal, float3 maxVal) { return min(max(x, minVal), maxVal); }
+float4 clamp(float4 x, float4 minVal, float4 maxVal) { return min(max(x, minVal), maxVal); }
+
+float2 clamp(float2 x, float  minVal, float  maxVal) { return min(max(x, minVal), maxVal); }
+float3 clamp(float3 x, float  minVal, float  maxVal) { return min(max(x, minVal), maxVal); }
+float4 clamp(float4 x, float  minVal, float  maxVal) { return min(max(x, minVal), maxVal); }
+
+float  saturate(float  x) { return min(max(x, 0), 1); }
+float2 saturate(float2 x) { return min(max(x, 0), 1); }
+float3 saturate(float3 x) { return min(max(x, 0), 1); }
+float4 saturate(float4 x) { return min(max(x, 0), 1); }
+
+float  step(float  edge, float  x) { return mix(1, 0, x < edge); }
+float2 step(float2 edge, float2 x) { return mix(float2(1), float2(0), lessThan(x, edge)); }
+float3 step(float3 edge, float3 x) { return mix(float3(1), float3(0), lessThan(x, edge)); }
+float4 step(float4 edge, float4 x) { return mix(float4(1), float4(0), lessThan(x, edge)); }
+
+float2 step(float  edge, float2 x) { return mix(float2(1), float2(0), lessThan(x, float2(edge))); }
+float3 step(float  edge, float3 x) { return mix(float3(1), float3(0), lessThan(x, float3(edge))); }
+float4 step(float  edge, float4 x) { return mix(float4(1), float4(0), lessThan(x, float4(edge))); }
+
+float  smoothstep(float  edge0, float  edge1, float  x) {
+    float  t = saturate((x - edge0) / (edge1 - edge0));
+    return t * t * (3.0 - 2.0 * t);
+}
+
+float2 smoothstep(float2 edge0, float2 edge1, float2 x) {
+    float2 t = saturate((x - edge0) / (edge1 - edge0));
+    return t * t * (3.0 - 2.0 * t);
+}
+
+float3 smoothstep(float3 edge0, float3 edge1, float3 x) {
+    float3 t = saturate((x - edge0) / (edge1 - edge0));
+    return t * t * (3.0 - 2.0 * t);
+}
+
+float4 smoothstep(float4 edge0, float4 edge1, float4 x) {
+    float4 t = saturate((x - edge0) / (edge1 - edge0));
+    return t * t * (3.0 - 2.0 * t);
+}
+
+float2 smoothstep(float  edge0, float  edge1, float2 x) {
+    float2 t = saturate((x - edge0) / (edge1 - edge0));
+    return t * t * (3.0 - 2.0 * t);
+}
+
+float3 smoothstep(float  edge0, float  edge1, float3 x) {
+    float3 t = saturate((x - edge0) / (edge1 - edge0));
+    return t * t * (3.0 - 2.0 * t);
+}
+
+float4 smoothstep(float  edge0, float  edge1, float4 x) {
+    float4 t = saturate((x - edge0) / (edge1 - edge0));
+    return t * t * (3.0 - 2.0 * t);
+}
+
+)
diff --git a/tests/SkSLInterpreterTest.cpp b/tests/SkSLInterpreterTest.cpp
index f4bf75c..2bc3019 100644
--- a/tests/SkSLInterpreterTest.cpp
+++ b/tests/SkSLInterpreterTest.cpp
@@ -796,8 +796,6 @@
         "float main(float x) { return sub(sqr(x), x); }\n"
 
         // Different signatures
-        "float dot(float2 a, float2 b) { return a.x*b.x + a.y*b.y; }\n"
-        "float dot(float3 a, float3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n"
         "float dot3_test(float x) { return dot(float3(x, x + 1, x + 2), float3(1, -1, 2)); }\n"
         "float dot2_test(float x) { return dot(float2(x, x + 1), float2(1, -1)); }\n";
 
@@ -959,6 +957,29 @@
           expectedVector[] = { 3.0f, 4.0f, 5.0f, 6.0f };
     test(r, "float4 main(float4 x, float4 y) { return mix(x, y, 0.5); }", valueVectors,
          expectedVector);
+
+    auto expect = [&expectedVector](float a, float b, float c, float d) {
+        expectedVector[0] = a;
+        expectedVector[1] = b;
+        expectedVector[2] = c;
+        expectedVector[3] = d;
+    };
+
+    expect(1, 2, 7, 8);
+    test(r, "float4 main(float4 x, float4 y) { return mix(x, y, greaterThan(x, float4(2.5))); }",
+         valueVectors, expectedVector);
+
+    expect(5, 6, 3, 4);
+    test(r, "float4 main(float4 x, float4 y) { return mix(x, y, lessThan(x, float4(2.5))); }",
+         valueVectors, expectedVector);
+
+    expect(1, 2, 7, 8);
+    test(r, "float4 main(float4 x, float4 y) { return mix(x, y, greaterThanEqual(x, float4(3))); }",
+         valueVectors, expectedVector);
+
+    expect(5, 6, 3, 4);
+    test(r, "float4 main(float4 x, float4 y) { return mix(x, y, lessThanEqual(x, float4(2))); }",
+         valueVectors, expectedVector);
 }
 
 DEF_TEST(SkSLInterpreterCross, r) {
diff --git a/tools/gpu/d3d/D3DTestContext.cpp b/tools/gpu/d3d/D3DTestContext.cpp
index 26ea375..ba00a36 100644
--- a/tools/gpu/d3d/D3DTestContext.cpp
+++ b/tools/gpu/d3d/D3DTestContext.cpp
@@ -38,9 +38,6 @@
 
     void testAbandon() override {}
 
-    // There is really nothing to here since we don't own any unqueued command buffers here.
-    void submit() override {}
-
     void finish() override {}
 
     sk_sp<GrContext> makeGrContext(const GrContextOptions& options) override {
@@ -63,7 +60,6 @@
     void onPlatformMakeNotCurrent() const override {}
     void onPlatformMakeCurrent() const override {}
     std::function<void()> onPlatformGetAutoContextRestore() const override  { return nullptr; }
-    void onPlatformSwapBuffers() const override {}
 
     typedef sk_gpu_test::D3DTestContext INHERITED;
 };
diff --git a/tools/gpu/d3d/D3DTestUtils.cpp b/tools/gpu/d3d/D3DTestUtils.cpp
index b750c4b..db52405 100644
--- a/tools/gpu/d3d/D3DTestUtils.cpp
+++ b/tools/gpu/d3d/D3DTestUtils.cpp
@@ -9,7 +9,6 @@
 
 #ifdef SK_DIRECT3D
 #include <d3d12sdklayers.h>
-#include <dxgi1_4.h>
 
 #include "include/gpu/d3d/GrD3DBackendContext.h"
 
@@ -73,6 +72,7 @@
         return false;
     }
 
+    ctx->fAdapter = hardwareAdapter;
     ctx->fDevice = device;
     ctx->fQueue = queue;
     // TODO: set up protected memory
diff --git a/tools/gpu/dawn/DawnTestContext.cpp b/tools/gpu/dawn/DawnTestContext.cpp
index 6ff998f..c236697 100644
--- a/tools/gpu/dawn/DawnTestContext.cpp
+++ b/tools/gpu/dawn/DawnTestContext.cpp
@@ -158,7 +158,6 @@
     void onPlatformMakeNotCurrent() const override {}
     void onPlatformMakeCurrent() const override {}
     std::function<void()> onPlatformGetAutoContextRestore() const override  { return nullptr; }
-    void onPlatformSwapBuffers() const override {}
     std::unique_ptr<dawn_native::Instance> fInstance;
 
     typedef sk_gpu_test::DawnTestContext INHERITED;