added support for PLS path rendering

BUG=skia:3555
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1541903002

Committed: https://skia.googlesource.com/skia/+/7df3f5e127f8016d17b637cc48a6a4718f1a6822

Review URL: https://codereview.chromium.org/1541903002
diff --git a/src/gpu/GrDrawTarget.cpp b/src/gpu/GrDrawTarget.cpp
index e2e188e..b9dc794 100644
--- a/src/gpu/GrDrawTarget.cpp
+++ b/src/gpu/GrDrawTarget.cpp
@@ -20,6 +20,7 @@
 #include "GrSurfacePriv.h"
 #include "GrTexture.h"
 #include "GrVertexBuffer.h"
+#include "gl/GrGLRenderTarget.h"
 
 #include "SkStrokeRec.h"
 
@@ -212,6 +213,8 @@
         }
         fBatches[i]->draw(flushState);
     }
+
+    fGpu->performFlushWorkaround();
 }
 
 void GrDrawTarget::reset() {
@@ -487,6 +490,31 @@
     args.fCaps = this->caps();
     args.fScissor = scissor;
     batch->getPipelineOptimizations(&args.fOpts);
+    GrScissorState finalScissor;
+    if (args.fOpts.fOverrides.fUsePLSDstRead) {
+        GrRenderTarget* rt = pipelineBuilder->getRenderTarget();
+        GrGLIRect viewport;
+        viewport.fLeft = 0;
+        viewport.fBottom = 0;
+        viewport.fWidth = rt->width();
+        viewport.fHeight = rt->height();
+        SkIRect ibounds;
+        ibounds.fLeft = SkTPin(SkScalarFloorToInt(batch->bounds().fLeft), viewport.fLeft, 
+                              viewport.fWidth);
+        ibounds.fTop = SkTPin(SkScalarFloorToInt(batch->bounds().fTop), viewport.fBottom, 
+                             viewport.fHeight);
+        ibounds.fRight = SkTPin(SkScalarCeilToInt(batch->bounds().fRight), viewport.fLeft, 
+                               viewport.fWidth);
+        ibounds.fBottom = SkTPin(SkScalarCeilToInt(batch->bounds().fBottom), viewport.fBottom, 
+                                viewport.fHeight);
+        if (scissor != nullptr && scissor->enabled()) {
+            if (!ibounds.intersect(scissor->rect())) {
+                ibounds = scissor->rect();
+            }
+        }
+        finalScissor.set(ibounds);
+        args.fScissor = &finalScissor;
+    }
     args.fOpts.fColorPOI.completeCalculations(pipelineBuilder->fColorFragmentProcessors.begin(),
                                               pipelineBuilder->numColorFragmentProcessors());
     args.fOpts.fCoveragePOI.completeCalculations(
diff --git a/src/gpu/GrGeometryProcessor.h b/src/gpu/GrGeometryProcessor.h
index 4d041d6..50d0bd5 100644
--- a/src/gpu/GrGeometryProcessor.h
+++ b/src/gpu/GrGeometryProcessor.h
@@ -20,8 +20,7 @@
 class GrGeometryProcessor : public GrPrimitiveProcessor {
 public:
     GrGeometryProcessor()
-        : INHERITED(false)
-        , fWillUseGeoShader(false)
+        : fWillUseGeoShader(false)
         , fLocalCoordsType(kUnused_LocalCoordsType) {}
 
     bool willUseGeoShader() const override { return fWillUseGeoShader; }
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 43a75ae..ab8e5cb 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -366,6 +366,9 @@
     this->onResolveRenderTarget(target);
 }
 
+void GrGpu::performFlushWorkaround() {
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 
 void GrGpu::draw(const DrawArgs& args, const GrVertices& vertices) {
@@ -381,3 +384,4 @@
         fStats.incNumDraws();
     } while ((verts = iter.next()));
 }
+
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index bfcaeae..2140349 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -349,6 +349,9 @@
 
     void draw(const DrawArgs&, const GrVertices&);
 
+    // Called by drawtarget when flushing. Provides a hook for working around an ARM PLS driver bug.
+    virtual void performFlushWorkaround();
+
     ///////////////////////////////////////////////////////////////////////////
     // Debugging and Stats
 
diff --git a/src/gpu/GrOvalRenderer.cpp b/src/gpu/GrOvalRenderer.cpp
index 6cb203c..f233fcb 100644
--- a/src/gpu/GrOvalRenderer.cpp
+++ b/src/gpu/GrOvalRenderer.cpp
@@ -677,7 +677,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
@@ -897,7 +896,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
@@ -1169,7 +1167,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
@@ -1527,7 +1524,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
@@ -1707,7 +1703,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
diff --git a/src/gpu/GrPLSGeometryProcessor.h b/src/gpu/GrPLSGeometryProcessor.h
new file mode 100644
index 0000000..5c1d6ee
--- /dev/null
+++ b/src/gpu/GrPLSGeometryProcessor.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrPLSGeometryProcessor_DEFINED
+#define GrPLSGeometryProcessor_DEFINED
+
+#include "GrGeometryProcessor.h"
+
+/**
+ * A minor extension to GrGeometryProcessor that adds bounds tracking for pixel local storage
+ * purposes.
+ */
+class GrPLSGeometryProcessor : public GrGeometryProcessor {
+public:
+    GrPixelLocalStorageState getPixelLocalStorageState() const override { 
+        return GrPixelLocalStorageState::kDraw_GrPixelLocalStorageState;
+    }
+
+    const SkRect& getBounds() const {
+       return fBounds;
+    }
+
+    void setBounds(SkRect& bounds) {
+       fBounds = bounds;
+    }
+
+private:
+    SkRect fBounds;
+};
+
+#endif
diff --git a/src/gpu/GrPathProcessor.cpp b/src/gpu/GrPathProcessor.cpp
index f1faa7d..6ecad59 100644
--- a/src/gpu/GrPathProcessor.cpp
+++ b/src/gpu/GrPathProcessor.cpp
@@ -123,8 +123,7 @@
                                  const GrXPOverridesForBatch& overrides,
                                  const SkMatrix& viewMatrix,
                                  const SkMatrix& localMatrix)
-    : INHERITED(true)
-    , fColor(color)
+    : fColor(color)
     , fViewMatrix(viewMatrix)
     , fLocalMatrix(localMatrix)
     , fOverrides(overrides) {
diff --git a/src/gpu/GrPathProcessor.h b/src/gpu/GrPathProcessor.h
index 831770c..fdd8498 100644
--- a/src/gpu/GrPathProcessor.h
+++ b/src/gpu/GrPathProcessor.h
@@ -40,6 +40,8 @@
 
     const GrXPOverridesForBatch& overrides() const { return fOverrides; }
 
+    virtual bool isPathRendering() const override { return true; }
+
 private:
     GrPathProcessor(GrColor color, const GrXPOverridesForBatch& overrides,
                     const SkMatrix& viewMatrix, const SkMatrix& localMatrix);
diff --git a/src/gpu/GrPathRendererChain.cpp b/src/gpu/GrPathRendererChain.cpp
index e13e6c6..d9717d5 100644
--- a/src/gpu/GrPathRendererChain.cpp
+++ b/src/gpu/GrPathRendererChain.cpp
@@ -10,6 +10,8 @@
 #include "GrPathRendererChain.h"
 
 #include "GrCaps.h"
+#include "gl/GrGLCaps.h"
+#include "glsl/GrGLSLCaps.h"
 #include "GrContext.h"
 #include "GrGpu.h"
 
@@ -21,6 +23,7 @@
 #include "batches/GrDefaultPathRenderer.h"
 #include "batches/GrStencilAndCoverPathRenderer.h"
 #include "batches/GrTessellatingPathRenderer.h"
+#include "batches/GrPLSPathRenderer.h"
 
 GrPathRendererChain::GrPathRendererChain(GrContext* context) {
     const GrCaps& caps = *context->caps();
@@ -34,6 +37,9 @@
     this->addPathRenderer(new GrAAHairLinePathRenderer)->unref();
     this->addPathRenderer(new GrAAConvexPathRenderer)->unref();
     this->addPathRenderer(new GrAALinearizingConvexPathRenderer)->unref();
+    if (caps.shaderCaps()->plsPathRenderingSupport()) {
+        this->addPathRenderer(new GrPLSPathRenderer)->unref();
+    }
     this->addPathRenderer(new GrAADistanceFieldPathRenderer)->unref();
     this->addPathRenderer(new GrDefaultPathRenderer(caps.twoSidedStencilSupport(),
                                                     caps.stencilWrapOpsSupport()))->unref();
diff --git a/src/gpu/GrPrimitiveProcessor.h b/src/gpu/GrPrimitiveProcessor.h
index 467200a..c1e946d 100644
--- a/src/gpu/GrPrimitiveProcessor.h
+++ b/src/gpu/GrPrimitiveProcessor.h
@@ -47,6 +47,16 @@
 
 struct GrInitInvariantOutput;
 
+// Describes the state of pixel local storage with respect to the current draw. 
+enum GrPixelLocalStorageState {
+    // The draw is actively updating PLS.
+    kDraw_GrPixelLocalStorageState,
+    // The draw is a "finish" operation which is reading from PLS and writing color.
+    kFinish_GrPixelLocalStorageState,
+    // The draw does not use PLS.
+    kDisabled_GrPixelLocalStorageState
+};
+
 /*
  * This class allows the GrPipeline to communicate information about the pipeline to a
  * GrBatch which should be forwarded to the GrPrimitiveProcessor(s) created by the batch.
@@ -199,7 +209,7 @@
         the object. */
     virtual GrGLSLPrimitiveProcessor* createGLSLInstance(const GrGLSLCaps& caps) const = 0;
 
-    bool isPathRendering() const { return fIsPathRendering; }
+    virtual bool isPathRendering() const { return false; }
 
     /**
      * No Local Coord Transformation is needed in the shader, instead transformed local coords will
@@ -207,11 +217,19 @@
      */
     virtual bool hasTransformedLocalCoords() const = 0;
 
+    virtual GrPixelLocalStorageState getPixelLocalStorageState() const { 
+        return kDisabled_GrPixelLocalStorageState;
+    }
+
+    /**
+     * If non-null, overrides the dest color returned by GrGLSLFragmentShaderBuilder::dstColor().
+     */
+    virtual const char* getDestColorOverride() const { return nullptr; }
+    
 protected:
-    GrPrimitiveProcessor(bool isPathRendering)
+    GrPrimitiveProcessor()
         : fNumAttribs(0)
-        , fVertexStride(0)
-        , fIsPathRendering(isPathRendering) {}
+        , fVertexStride(0) {}
 
     Attribute fAttribs[kMaxVertexAttribs];
     int fNumAttribs;
@@ -221,8 +239,6 @@
     void notifyRefCntIsZero() const final {};
     virtual bool hasExplicitLocalCoords() const = 0;
 
-    bool fIsPathRendering;
-
     typedef GrProcessor INHERITED;
 };
 
diff --git a/src/gpu/GrXferProcessor.cpp b/src/gpu/GrXferProcessor.cpp
index b07b972..01534a3 100644
--- a/src/gpu/GrXferProcessor.cpp
+++ b/src/gpu/GrXferProcessor.cpp
@@ -217,3 +217,10 @@
     return (this->willReadDstColor(caps, optimizations, hasMixedSamples) &&
             !caps.shaderCaps()->dstReadInShaderSupport());
 }
+
+bool GrXPFactory::willReadDstColor(const GrCaps& caps,
+                                   const GrPipelineOptimizations& optimizations,
+                                   bool hasMixedSamples) const {
+    return optimizations.fOverrides.fUsePLSDstRead || this->onWillReadDstColor(caps, optimizations,
+                                                                               hasMixedSamples);
+}
diff --git a/src/gpu/batches/GrAAConvexPathRenderer.cpp b/src/gpu/batches/GrAAConvexPathRenderer.cpp
index 9fde34c..4fd3c6f 100644
--- a/src/gpu/batches/GrAAConvexPathRenderer.cpp
+++ b/src/gpu/batches/GrAAConvexPathRenderer.cpp
@@ -761,7 +761,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
diff --git a/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp b/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp
index e94cd59..d48649c 100644
--- a/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp
+++ b/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp
@@ -158,7 +158,6 @@
                                       GrBatchToXPOverrides* overrides) const override {
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
diff --git a/src/gpu/batches/GrAAHairLinePathRenderer.cpp b/src/gpu/batches/GrAAHairLinePathRenderer.cpp
index 6b7b308..97a779e 100644
--- a/src/gpu/batches/GrAAHairLinePathRenderer.cpp
+++ b/src/gpu/batches/GrAAHairLinePathRenderer.cpp
@@ -692,7 +692,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = true;
     }
 
 private:
diff --git a/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp b/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp
index c830509..13267fd 100644
--- a/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp
+++ b/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp
@@ -140,7 +140,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
diff --git a/src/gpu/batches/GrAAStrokeRectBatch.cpp b/src/gpu/batches/GrAAStrokeRectBatch.cpp
index 21c55f6..8cb1c24 100644
--- a/src/gpu/batches/GrAAStrokeRectBatch.cpp
+++ b/src/gpu/batches/GrAAStrokeRectBatch.cpp
@@ -68,7 +68,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
     SkSTArray<1, Geometry, true>* geoData() { return &fGeoData; }
diff --git a/src/gpu/batches/GrDefaultPathRenderer.cpp b/src/gpu/batches/GrDefaultPathRenderer.cpp
index c39a530..a5862c6 100644
--- a/src/gpu/batches/GrDefaultPathRenderer.cpp
+++ b/src/gpu/batches/GrDefaultPathRenderer.cpp
@@ -234,7 +234,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setKnownSingleComponent(this->coverage());
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
diff --git a/src/gpu/batches/GrDrawAtlasBatch.h b/src/gpu/batches/GrDrawAtlasBatch.h
index 4e89523..b70f190 100644
--- a/src/gpu/batches/GrDrawAtlasBatch.h
+++ b/src/gpu/batches/GrDrawAtlasBatch.h
@@ -39,7 +39,6 @@
             color->setKnownFourComponents(fGeoData[0].fColor);
         }
         coverage->setKnownSingleComponent(0xff);
-        overrides->fUsePLSDstRead = false;
     }
 
     SkSTArray<1, Geometry, true>* geoData() { return &fGeoData; }
diff --git a/src/gpu/batches/GrDrawBatch.cpp b/src/gpu/batches/GrDrawBatch.cpp
index 4253923..78bbd3b 100644
--- a/src/gpu/batches/GrDrawBatch.cpp
+++ b/src/gpu/batches/GrDrawBatch.cpp
@@ -18,6 +18,7 @@
 void GrDrawBatch::getPipelineOptimizations(GrPipelineOptimizations* opt) const {
     GrInitInvariantOutput color;
     GrInitInvariantOutput coverage;
+    opt->fOverrides.fUsePLSDstRead = false;
     this->computePipelineOptimizations(&color, &coverage, &opt->fOverrides);
     opt->fColorPOI.initUsingInvariantOutput(color);
     opt->fCoveragePOI.initUsingInvariantOutput(coverage);
diff --git a/src/gpu/batches/GrDrawPathBatch.h b/src/gpu/batches/GrDrawPathBatch.h
index 0b24fe0..618bc5e 100644
--- a/src/gpu/batches/GrDrawPathBatch.h
+++ b/src/gpu/batches/GrDrawPathBatch.h
@@ -24,7 +24,6 @@
                                       GrBatchToXPOverrides* overrides) const override {
         color->setKnownFourComponents(fColor);
         coverage->setKnownSingleComponent(0xff);
-        overrides->fUsePLSDstRead = false;
     }
 
     GrPathRendering::FillType fillType() const { return fFillType; }
diff --git a/src/gpu/batches/GrDrawVerticesBatch.cpp b/src/gpu/batches/GrDrawVerticesBatch.cpp
index cfbd24c..156e4b3 100644
--- a/src/gpu/batches/GrDrawVerticesBatch.cpp
+++ b/src/gpu/batches/GrDrawVerticesBatch.cpp
@@ -75,7 +75,6 @@
         color->setKnownFourComponents(fGeoData[0].fColor);
     }
     coverage->setKnownSingleComponent(0xff);
-    overrides->fUsePLSDstRead = false;
 }
 
 void GrDrawVerticesBatch::initBatchTracker(const GrXPOverridesForBatch& overrides) {
diff --git a/src/gpu/batches/GrNinePatch.cpp b/src/gpu/batches/GrNinePatch.cpp
index 59040cb..bbd2801 100644
--- a/src/gpu/batches/GrNinePatch.cpp
+++ b/src/gpu/batches/GrNinePatch.cpp
@@ -79,7 +79,6 @@
                                       GrBatchToXPOverrides* overrides) const override {
         color->setUnknownFourComponents();
         coverage->setKnownSingleComponent(0xff);
-        overrides->fUsePLSDstRead = false;
     }
 
     SkSTArray<1, Geometry, true>* geoData() { return &fGeoData; }
diff --git a/src/gpu/batches/GrNonAAStrokeRectBatch.cpp b/src/gpu/batches/GrNonAAStrokeRectBatch.cpp
index d036b90..6766de2 100644
--- a/src/gpu/batches/GrNonAAStrokeRectBatch.cpp
+++ b/src/gpu/batches/GrNonAAStrokeRectBatch.cpp
@@ -60,7 +60,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setKnownSingleComponent(0xff);
-        overrides->fUsePLSDstRead = false;
     }
 
     void append(GrColor color, const SkMatrix& viewMatrix, const SkRect& rect,
diff --git a/src/gpu/batches/GrPLSPathRenderer.cpp b/src/gpu/batches/GrPLSPathRenderer.cpp
new file mode 100644
index 0000000..1f61192
--- /dev/null
+++ b/src/gpu/batches/GrPLSPathRenderer.cpp
@@ -0,0 +1,1012 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrPLSPathRenderer.h"
+
+#include "SkChunkAlloc.h"
+#include "SkGeometry.h"
+#include "SkPathPriv.h"
+#include "SkString.h"
+#include "SkTSort.h"
+#include "SkTraceEvent.h"
+#include "GrBatchFlushState.h"
+#include "GrBatchTest.h"
+#include "GrCaps.h"
+#include "GrContext.h"
+#include "GrDefaultGeoProcFactory.h"
+#include "GrPLSGeometryProcessor.h"
+#include "GrInvariantOutput.h"
+#include "GrPathUtils.h"
+#include "GrProcessor.h"
+#include "GrPipelineBuilder.h"
+#include "GrStrokeInfo.h"
+#include "GrTessellator.h"
+#include "batches/GrVertexBatch.h"
+#include "glsl/GrGLSLGeometryProcessor.h"
+#include "gl/builders/GrGLProgramBuilder.h"
+#include "glsl/GrGLSLPLSPathRendering.h"
+
+GrPLSPathRenderer::GrPLSPathRenderer() {
+}
+
+struct PLSVertex {
+    SkPoint  fPos;
+    // for triangles, these are the three triangle vertices
+    // for quads, vert1 is the texture UV coords, and vert2 and vert3 are the line segment 
+    // comprising the flat edge of the quad
+    SkPoint  fVert1;
+    SkPoint  fVert2;
+    SkPoint  fVert3;
+    int fWinding;
+};
+typedef SkTArray<PLSVertex, true> PLSVertices;
+
+typedef SkTArray<SkPoint, true> FinishVertices;
+
+static const float kCubicTolerance = 0.5f;
+static const float kConicTolerance = 0.5f;
+
+static const float kBloatSize = 1.0f;
+
+static const float kBloatLimit = 640000.0f;
+
+#define kQuadNumVertices 5
+static void add_quad(SkPoint pts[3], PLSVertices& vertices) {
+    SkPoint normal = SkPoint::Make(pts[0].fY - pts[2].fY, 
+                                   pts[2].fX - pts[0].fX);
+    normal.setLength(kBloatSize);
+    SkScalar cross = (pts[1] - pts[0]).cross(pts[2] - pts[0]);
+    if (cross < 0) {
+        normal = -normal;
+    }
+    PLSVertex quad[kQuadNumVertices];
+    quad[0].fPos = pts[0] + normal;
+    quad[1].fPos = pts[0] - normal;
+    quad[2].fPos = pts[1] - normal;
+    quad[3].fPos = pts[2] - normal;
+    quad[4].fPos = pts[2] + normal;
+    for (int i = 0; i < kQuadNumVertices; i++) {
+        quad[i].fWinding = cross < 0 ? 1 : -1;
+        if (cross > 0.0) {
+            quad[i].fVert2 = pts[0];
+            quad[i].fVert3 = pts[2];
+        }
+        else {
+            quad[i].fVert2 = pts[2];
+            quad[i].fVert3 = pts[0];
+        }
+    }
+    GrPathUtils::QuadUVMatrix DevToUV(pts);
+    DevToUV.apply<kQuadNumVertices, sizeof(PLSVertex), sizeof(SkPoint)>(quad);
+    for (int i = 2; i < kQuadNumVertices; i++) {
+        vertices.push_back(quad[0]);
+        vertices.push_back(quad[i - 1]);
+        vertices.push_back(quad[i]);
+    }
+}
+
+/* Used by bloat_tri; outsets a single point. */
+static bool outset(SkPoint* p1, SkPoint line1, SkPoint line2) {
+    // rotate the two line vectors 90 degrees to form the normals, and compute
+    // the dot product of the normals
+    SkScalar dotProd = line1.fY * line2.fY + line1.fX * line2.fX;
+    SkScalar lengthSq = 1.0f / ((1.0f - dotProd) / 2.0f);
+    if (lengthSq > kBloatLimit) {
+        return false;
+    }
+    SkPoint bisector = line1 + line2;
+    bisector.setLength(SkScalarSqrt(lengthSq) * kBloatSize);
+    *p1 += bisector;
+    return true;
+}
+
+/* Bloats a triangle so as to create a border kBloatSize pixels wide all around it. */
+static bool bloat_tri(SkPoint pts[3]) {
+    SkPoint line1 = pts[0] - pts[1];
+    line1.normalize();
+    SkPoint line2 = pts[0] - pts[2];
+    line2.normalize();
+    SkPoint line3 = pts[1] - pts[2];
+    line3.normalize();
+
+    SkPoint result[3];
+    result[0] = pts[0];
+    if (!outset(&result[0], line1, line2)) {
+        return false;
+    }
+    result[1] = pts[1];
+    if (!outset(&result[1], -line1, line3)) {
+        return false;
+    }
+    result[2] = pts[2];
+    if (!outset(&result[2], -line3, -line2)) {
+        return false;
+    }
+    pts[0] = result[0];
+    pts[1] = result[1];
+    pts[2] = result[2];
+    return true;
+}
+
+static bool get_geometry(const SkPath& path, const SkMatrix& m, PLSVertices& triVertices, 
+                         PLSVertices& quadVertices, GrResourceProvider* resourceProvider,
+                         SkRect bounds) {
+    SkScalar screenSpaceTol = GrPathUtils::kDefaultTolerance;
+    SkScalar tol = GrPathUtils::scaleToleranceToSrc(screenSpaceTol, m, bounds);
+    int contourCnt;
+    int maxPts = GrPathUtils::worstCasePointCount(path, &contourCnt, tol);
+    if (maxPts <= 0) {
+        return 0;
+    }
+    SkPath linesOnlyPath;
+    linesOnlyPath.setFillType(path.getFillType());
+    SkSTArray<15, SkPoint, true> quadPoints;
+    SkPathPriv::FirstDirection dir = SkPathPriv::FirstDirection::kUnknown_FirstDirection;
+    SkPath::Iter iter(path, true);
+    bool done = false;
+    while (!done) {
+        SkPoint pts[4];
+        SkPath::Verb verb = iter.next(pts);
+        switch (verb) {
+            case SkPath::kMove_Verb:
+                SkASSERT(quadPoints.count() % 3 == 0);
+                for (int i = 0; i < quadPoints.count(); i += 3) {
+                    add_quad(&quadPoints[i], quadVertices);
+                }
+                quadPoints.reset();
+                m.mapPoints(&pts[0], 1);
+                linesOnlyPath.moveTo(pts[0]);
+                break;
+            case SkPath::kLine_Verb:
+                m.mapPoints(&pts[1], 1);
+                linesOnlyPath.lineTo(pts[1]);
+                break;
+            case SkPath::kQuad_Verb:
+                m.mapPoints(pts, 3);
+                linesOnlyPath.lineTo(pts[2]);
+                quadPoints.push_back(pts[0]);
+                quadPoints.push_back(pts[1]);
+                quadPoints.push_back(pts[2]);
+                break;
+            case SkPath::kCubic_Verb: {
+                m.mapPoints(pts, 4);
+                SkSTArray<15, SkPoint, true> quads;
+                GrPathUtils::convertCubicToQuads(pts, kCubicTolerance, false, dir, &quads);
+                int count = quads.count();
+                for (int q = 0; q < count; q += 3) {
+                    linesOnlyPath.lineTo(quads[q + 2]);
+                    quadPoints.push_back(quads[q]);
+                    quadPoints.push_back(quads[q + 1]);
+                    quadPoints.push_back(quads[q + 2]);
+                }
+                break;
+            }
+            case SkPath::kConic_Verb: {
+                m.mapPoints(pts, 3);
+                SkScalar weight = iter.conicWeight();
+                SkAutoConicToQuads converter;
+                const SkPoint* quads = converter.computeQuads(pts, weight, kConicTolerance);
+                int count = converter.countQuads();
+                for (int i = 0; i < count; ++i) {
+                    linesOnlyPath.lineTo(quads[2 * i + 2]);
+                    quadPoints.push_back(quads[2 * i]);
+                    quadPoints.push_back(quads[2 * i + 1]);
+                    quadPoints.push_back(quads[2 * i + 2]);
+                }
+                break;
+            }
+            case SkPath::kClose_Verb:
+                linesOnlyPath.close();
+                break;
+            case SkPath::kDone_Verb:
+                done = true;
+                break;
+            default: SkASSERT(false);
+        }
+    }
+    SkASSERT(quadPoints.count() % 3 == 0);
+    for (int i = 0; i < quadPoints.count(); i += 3) {
+        add_quad(&quadPoints[i], quadVertices);
+    }
+
+    static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+    GrUniqueKey key;
+    GrUniqueKey::Builder builder(&key, kDomain, 2);
+    builder[0] = path.getGenerationID();
+    builder[1] = path.getFillType();
+    builder.finish();
+    GrTessellator::WindingVertex* windingVertices;
+    int triVertexCount = GrTessellator::PathToVertices(linesOnlyPath, 0, bounds, &windingVertices);
+    if (triVertexCount > 0) {
+        for (int i = 0; i < triVertexCount; i += 3) {
+            SkPoint p1 = windingVertices[i].fPos;
+            SkPoint p2 = windingVertices[i + 1].fPos;
+            SkPoint p3 = windingVertices[i + 2].fPos;
+            int winding = windingVertices[i].fWinding;
+            SkASSERT(windingVertices[i + 1].fWinding == winding);
+            SkASSERT(windingVertices[i + 2].fWinding == winding);
+            SkScalar cross = (p2 - p1).cross(p3 - p1);
+            SkPoint bloated[3] = { p1, p2, p3 };
+            if (cross < 0.0f) {
+                SkTSwap(p1, p3);
+            }
+            if (bloat_tri(bloated)) {
+                triVertices.push_back({ bloated[0], p1, p2, p3, winding });
+                triVertices.push_back({ bloated[1], p1, p2, p3, winding });
+                triVertices.push_back({ bloated[2], p1, p2, p3, winding });
+            }
+            else {
+                SkScalar minX = SkTMin(p1.fX, SkTMin(p2.fX, p3.fX)) - 1.0f;
+                SkScalar minY = SkTMin(p1.fY, SkTMin(p2.fY, p3.fY)) - 1.0f;
+                SkScalar maxX = SkTMax(p1.fX, SkTMax(p2.fX, p3.fX)) + 1.0f;
+                SkScalar maxY = SkTMax(p1.fY, SkTMax(p2.fY, p3.fY)) + 1.0f;
+                triVertices.push_back({ { minX, minY }, p1, p2, p3, winding });
+                triVertices.push_back({ { maxX, minY }, p1, p2, p3, winding });
+                triVertices.push_back({ { minX, maxY }, p1, p2, p3, winding });
+                triVertices.push_back({ { maxX, minY }, p1, p2, p3, winding });
+                triVertices.push_back({ { maxX, maxY }, p1, p2, p3, winding });
+                triVertices.push_back({ { minX, maxY }, p1, p2, p3, winding });
+            }
+        }
+        delete[] windingVertices;
+    }
+    return triVertexCount > 0 || quadVertices.count() > 0;
+}
+
+class PLSAATriangleEffect : public GrPLSGeometryProcessor {
+public:
+
+    static GrPLSGeometryProcessor* Create(const SkMatrix& localMatrix,
+                                          bool usesLocalCoords) {
+        return new PLSAATriangleEffect(localMatrix, usesLocalCoords);
+    }
+
+    virtual ~PLSAATriangleEffect() {}
+
+    const char* name() const override { return "PLSAATriangle"; }
+
+    const Attribute* inPosition() const { return fInPosition; }
+    const Attribute* inVertex1() const { return fInVertex1; }
+    const Attribute* inVertex2() const { return fInVertex2; }
+    const Attribute* inVertex3() const { return fInVertex3; }
+    const Attribute* inWindings() const { return fInWindings; }
+    const SkMatrix& localMatrix() const { return fLocalMatrix; }
+    bool usesLocalCoords() const { return fUsesLocalCoords; }
+
+    class GLSLProcessor : public GrGLSLGeometryProcessor {
+    public:
+        GLSLProcessor(const GrGeometryProcessor&) {}
+
+        void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
+            const PLSAATriangleEffect& te = args.fGP.cast<PLSAATriangleEffect>();
+            GrGLSLVertexBuilder* vsBuilder = args.fVertBuilder;
+            GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
+            GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
+
+            varyingHandler->emitAttributes(te);
+
+            this->setupPosition(vsBuilder, gpArgs, te.inPosition()->fName);
+
+            GrGLSLVertToFrag v1(kVec2f_GrSLType);
+            varyingHandler->addVarying("Vertex1", &v1, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x, %s.y);", 
+                                   v1.vsOut(),
+                                   te.inVertex1()->fName, 
+                                   te.inVertex1()->fName);
+
+            GrGLSLVertToFrag v2(kVec2f_GrSLType);
+            varyingHandler->addVarying("Vertex2", &v2, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x, %s.y);", 
+                                   v2.vsOut(),
+                                   te.inVertex2()->fName, 
+                                   te.inVertex2()->fName);
+
+            GrGLSLVertToFrag v3(kVec2f_GrSLType);
+            varyingHandler->addVarying("Vertex3", &v3, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x, %s.y);", 
+                                   v3.vsOut(),
+                                   te.inVertex3()->fName, 
+                                   te.inVertex3()->fName);
+
+            GrGLSLVertToFrag delta1(kVec2f_GrSLType);
+            varyingHandler->addVarying("delta1", &delta1, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x - %s.x, %s.y - %s.y) * 0.5;", 
+                                   delta1.vsOut(), v1.vsOut(), v2.vsOut(), v2.vsOut(), v1.vsOut());
+
+            GrGLSLVertToFrag delta2(kVec2f_GrSLType);
+            varyingHandler->addVarying("delta2", &delta2, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x - %s.x, %s.y - %s.y) * 0.5;", 
+                                   delta2.vsOut(), v2.vsOut(), v3.vsOut(), v3.vsOut(), v2.vsOut());
+
+            GrGLSLVertToFrag delta3(kVec2f_GrSLType);
+            varyingHandler->addVarying("delta3", &delta3, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x - %s.x, %s.y - %s.y) * 0.5;", 
+                                   delta3.vsOut(), v3.vsOut(), v1.vsOut(), v1.vsOut(), v3.vsOut());
+
+            GrGLSLVertToFrag windings(kInt_GrSLType);
+            varyingHandler->addVarying("windings", &windings, kLow_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = %s;", 
+                                   windings.vsOut(), te.inWindings()->fName);
+
+            // emit transforms
+            this->emitTransforms(vsBuilder, varyingHandler, uniformHandler, gpArgs->fPositionVar, 
+                                 te.inPosition()->fName, te.localMatrix(), args.fTransformsIn, 
+                                 args.fTransformsOut);
+
+            GrGLSLFragmentBuilder* fsBuilder = args.fFragBuilder;
+            SkAssertResult(fsBuilder->enableFeature(
+                           GrGLSLFragmentShaderBuilder::kPixelLocalStorage_GLSLFeature));
+            SkAssertResult(fsBuilder->enableFeature(
+                    GrGLSLFragmentShaderBuilder::kStandardDerivatives_GLSLFeature));
+            fsBuilder->declAppendf(GR_GL_PLS_PATH_DATA_DECL);
+            // Compute four subsamples, each shifted a quarter pixel along x and y from 
+            // gl_FragCoord. The oriented box positioning of the subsamples is of course not 
+            // optimal, but it greatly simplifies the math and this simplification is necessary for
+            // performance reasons.
+            fsBuilder->codeAppendf("highp vec2 firstSample = %s.xy - vec2(0.25);", 
+                                   fsBuilder->fragmentPosition());
+            fsBuilder->codeAppendf("highp vec2 delta1 = %s;", delta1.fsIn());
+            fsBuilder->codeAppendf("highp vec2 delta2 = %s;", delta2.fsIn());
+            fsBuilder->codeAppendf("highp vec2 delta3 = %s;", delta3.fsIn());
+            // Check whether first sample is inside the triangle by computing three dot products. If
+            // all are < 0, we're inside. The first vector in each case is half of what it is
+            // "supposed" to be, because we re-use them later as adjustment factors for which half
+            // is the correct value, so we multiply the dots by two to compensate.
+            fsBuilder->codeAppendf("highp float d1 = dot(delta1, (firstSample - %s).yx) * 2.0;", 
+                                   v1.fsIn());
+            fsBuilder->codeAppendf("highp float d2 = dot(delta2, (firstSample - %s).yx) * 2.0;", 
+                                   v2.fsIn());
+            fsBuilder->codeAppendf("highp float d3 = dot(delta3, (firstSample - %s).yx) * 2.0;", 
+                                   v3.fsIn());
+            fsBuilder->codeAppend("highp float dmax = max(d1, max(d2, d3));");
+            fsBuilder->codeAppendf("pls.windings[0] += (dmax <= 0.0) ? %s : 0;", windings.fsIn());
+            // for subsequent samples, we don't recalculate the entire dot product -- just adjust it
+            // to the value it would have if we did recompute it.
+            fsBuilder->codeAppend("d1 += delta1.x;");
+            fsBuilder->codeAppend("d2 += delta2.x;");
+            fsBuilder->codeAppend("d3 += delta3.x;");
+            fsBuilder->codeAppend("dmax = max(d1, max(d2, d3));");
+            fsBuilder->codeAppendf("pls.windings[1] += (dmax <= 0.0) ? %s : 0;", windings.fsIn());
+            fsBuilder->codeAppend("d1 += delta1.y;");
+            fsBuilder->codeAppend("d2 += delta2.y;");
+            fsBuilder->codeAppend("d3 += delta3.y;");
+            fsBuilder->codeAppend("dmax = max(d1, max(d2, d3));");
+            fsBuilder->codeAppendf("pls.windings[2] += (dmax <= 0.0) ? %s : 0;", windings.fsIn());
+            fsBuilder->codeAppend("d1 -= delta1.x;");
+            fsBuilder->codeAppend("d2 -= delta2.x;");
+            fsBuilder->codeAppend("d3 -= delta3.x;");
+            fsBuilder->codeAppend("dmax = max(d1, max(d2, d3));");
+            fsBuilder->codeAppendf("pls.windings[3] += (dmax <= 0.0) ? %s : 0;", windings.fsIn());
+        }
+
+        static inline void GenKey(const GrGeometryProcessor& gp,
+                                  const GrGLSLCaps&,
+                                  GrProcessorKeyBuilder* b) {
+            const PLSAATriangleEffect& te = gp.cast<PLSAATriangleEffect>();
+            uint32_t key = 0;
+            key |= te.localMatrix().hasPerspective() ? 0x1 : 0x0;
+            b->add32(key);
+        }
+
+        virtual void setData(const GrGLSLProgramDataManager& pdman,
+                             const GrPrimitiveProcessor& gp) override {
+        }
+
+        void setTransformData(const GrPrimitiveProcessor& primProc,
+                              const GrGLSLProgramDataManager& pdman,
+                              int index,
+                              const SkTArray<const GrCoordTransform*, true>& transforms) override {
+            this->setTransformDataHelper<PLSAATriangleEffect>(primProc, pdman, index, transforms);
+        }
+
+    private:
+        typedef GrGLSLGeometryProcessor INHERITED;
+    };
+
+    virtual void getGLSLProcessorKey(const GrGLSLCaps& caps,
+                                   GrProcessorKeyBuilder* b) const override {
+        GLSLProcessor::GenKey(*this, caps, b);
+    }
+
+    virtual GrGLSLPrimitiveProcessor* createGLSLInstance(const GrGLSLCaps&) const override {
+        return new GLSLProcessor(*this);
+    }
+
+private:
+    PLSAATriangleEffect(const SkMatrix& localMatrix, bool usesLocalCoords)
+        : fLocalMatrix(localMatrix)
+        , fUsesLocalCoords(usesLocalCoords) {
+        this->initClassID<PLSAATriangleEffect>();
+        fInPosition = &this->addVertexAttrib(Attribute("inPosition", kVec2f_GrVertexAttribType, 
+                                                       kHigh_GrSLPrecision));
+        fInVertex1 = &this->addVertexAttrib(Attribute("inVertex1", kVec2f_GrVertexAttribType, 
+                                                      kHigh_GrSLPrecision));
+        fInVertex2 = &this->addVertexAttrib(Attribute("inVertex2", kVec2f_GrVertexAttribType, 
+                                                      kHigh_GrSLPrecision));
+        fInVertex3 = &this->addVertexAttrib(Attribute("inVertex3", kVec2f_GrVertexAttribType, 
+                                                      kHigh_GrSLPrecision));
+        fInWindings = &this->addVertexAttrib(Attribute("inWindings", kInt_GrVertexAttribType, 
+                                                       kLow_GrSLPrecision));
+        this->setWillReadFragmentPosition();
+    }
+
+    const Attribute* fInPosition;
+    const Attribute* fInVertex1;
+    const Attribute* fInVertex2;
+    const Attribute* fInVertex3;
+    const Attribute* fInWindings;
+    SkMatrix         fLocalMatrix;
+    bool             fUsesLocalCoords;
+    
+    GR_DECLARE_GEOMETRY_PROCESSOR_TEST;
+
+    typedef GrGeometryProcessor INHERITED;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+/*
+ * Quadratic specified by 0=u^2-v canonical coords. u and v are the first
+ * two components of the vertex attribute. Coverage is based on signed
+ * distance with negative being inside, positive outside. The edge is specified in
+ * window space (y-down). If either the third or fourth component of the interpolated
+ * vertex coord is > 0 then the pixel is considered outside the edge. This is used to
+ * attempt to trim to a portion of the infinite quad.
+ * Requires shader derivative instruction support.
+ */
+
+class PLSQuadEdgeEffect : public GrPLSGeometryProcessor {
+public:
+
+    static GrPLSGeometryProcessor* Create(const SkMatrix& localMatrix,
+                                          bool usesLocalCoords) {
+        return new PLSQuadEdgeEffect(localMatrix, usesLocalCoords);
+    }
+
+    virtual ~PLSQuadEdgeEffect() {}
+
+    const char* name() const override { return "PLSQuadEdge"; }
+
+    const Attribute* inPosition() const { return fInPosition; }
+    const Attribute* inUV() const { return fInUV; }
+    const Attribute* inEndpoint1() const { return fInEndpoint1; }
+    const Attribute* inEndpoint2() const { return fInEndpoint2; }
+    const Attribute* inWindings() const { return fInWindings; }
+    const SkMatrix& localMatrix() const { return fLocalMatrix; }
+    bool usesLocalCoords() const { return fUsesLocalCoords; }
+
+    class GLSLProcessor : public GrGLSLGeometryProcessor {
+    public:
+        GLSLProcessor(const GrGeometryProcessor&) {}
+
+        void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
+            const PLSQuadEdgeEffect& qe = args.fGP.cast<PLSQuadEdgeEffect>();
+            GrGLSLVertexBuilder* vsBuilder = args.fVertBuilder;
+            GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
+            GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
+
+            // emit attributes
+            varyingHandler->emitAttributes(qe);
+
+            GrGLSLVertToFrag uv(kVec2f_GrSLType);
+            varyingHandler->addVarying("uv", &uv, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = %s;", uv.vsOut(), qe.inUV()->fName);
+
+            GrGLSLVertToFrag ep1(kVec2f_GrSLType);
+            varyingHandler->addVarying("endpoint1", &ep1, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x, %s.y);", ep1.vsOut(), 
+                                  qe.inEndpoint1()->fName, qe.inEndpoint1()->fName);
+
+            GrGLSLVertToFrag ep2(kVec2f_GrSLType);
+            varyingHandler->addVarying("endpoint2", &ep2, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x, %s.y);", ep2.vsOut(), 
+                                  qe.inEndpoint2()->fName, qe.inEndpoint2()->fName);
+
+            GrGLSLVertToFrag delta(kVec2f_GrSLType);
+            varyingHandler->addVarying("delta", &delta, kHigh_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = vec2(%s.x - %s.x, %s.y - %s.y) * 0.5;", 
+                                   delta.vsOut(), ep1.vsOut(), ep2.vsOut(), ep2.vsOut(), 
+                                   ep1.vsOut());
+
+            GrGLSLVertToFrag windings(kInt_GrSLType);
+            varyingHandler->addVarying("windings", &windings, kLow_GrSLPrecision);
+            vsBuilder->codeAppendf("%s = %s;", 
+                                   windings.vsOut(), qe.inWindings()->fName);
+
+            // Setup position
+            this->setupPosition(vsBuilder, gpArgs, qe.inPosition()->fName);
+
+            // emit transforms
+            this->emitTransforms(vsBuilder, varyingHandler, uniformHandler, gpArgs->fPositionVar, 
+                                 qe.inPosition()->fName, qe.localMatrix(), args.fTransformsIn, 
+                                 args.fTransformsOut);
+
+            GrGLSLFragmentBuilder* fsBuilder = args.fFragBuilder;
+            SkAssertResult(fsBuilder->enableFeature(
+                           GrGLSLFragmentShaderBuilder::kPixelLocalStorage_GLSLFeature));
+            SkAssertResult(fsBuilder->enableFeature(
+                    GrGLSLFragmentShaderBuilder::kStandardDerivatives_GLSLFeature));
+            static const int QUAD_ARGS = 2;
+            GrGLSLShaderVar inQuadArgs[QUAD_ARGS] = {
+                GrGLSLShaderVar("dot", kFloat_GrSLType, 0, kHigh_GrSLPrecision),
+                GrGLSLShaderVar("uv", kVec2f_GrSLType, 0, kHigh_GrSLPrecision)
+            };
+            SkString inQuadName;
+
+            const char* inQuadCode = "if (uv.x * uv.x <= uv.y) {"
+                                     "return dot >= 0.0;"
+                                     "} else {"
+                                     "return false;"
+                                     "}";
+            fsBuilder->emitFunction(kBool_GrSLType, "in_quad", QUAD_ARGS, inQuadArgs, inQuadCode, 
+                                    &inQuadName);
+            fsBuilder->declAppendf(GR_GL_PLS_PATH_DATA_DECL);
+            // keep the derivative instructions outside the conditional
+            fsBuilder->codeAppendf("highp vec2 uvdX = dFdx(%s);", uv.fsIn());
+            fsBuilder->codeAppendf("highp vec2 uvdY = dFdy(%s);", uv.fsIn());
+            fsBuilder->codeAppend("highp vec2 uvIncX = uvdX * 0.45 + uvdY * -0.1;");
+            fsBuilder->codeAppend("highp vec2 uvIncY = uvdX * 0.1 + uvdY * 0.55;");
+            fsBuilder->codeAppendf("highp vec2 uv = %s.xy - uvdX * 0.35 - uvdY * 0.25;", 
+                                   uv.fsIn());
+            fsBuilder->codeAppendf("highp vec2 firstSample = %s.xy - vec2(0.25);",
+                                   fsBuilder->fragmentPosition());
+            fsBuilder->codeAppendf("highp float d = dot(%s, (firstSample - %s).yx) * 2.0;", 
+                                   delta.fsIn(), ep1.fsIn());
+            fsBuilder->codeAppendf("pls.windings[0] += %s(d, uv) ? %s : 0;", inQuadName.c_str(), 
+                                   windings.fsIn());
+            fsBuilder->codeAppend("uv += uvIncX;");
+            fsBuilder->codeAppendf("d += %s.x;", delta.fsIn());
+            fsBuilder->codeAppendf("pls.windings[1] += %s(d, uv) ? %s : 0;", inQuadName.c_str(), 
+                                   windings.fsIn());
+            fsBuilder->codeAppend("uv += uvIncY;");
+            fsBuilder->codeAppendf("d += %s.y;", delta.fsIn());
+            fsBuilder->codeAppendf("pls.windings[2] += %s(d, uv) ? %s : 0;", inQuadName.c_str(), 
+                                   windings.fsIn());
+            fsBuilder->codeAppend("uv -= uvIncX;");
+            fsBuilder->codeAppendf("d -= %s.x;", delta.fsIn());
+            fsBuilder->codeAppendf("pls.windings[3] += %s(d, uv) ? %s : 0;", inQuadName.c_str(), 
+                                   windings.fsIn());
+        }
+
+        static inline void GenKey(const GrGeometryProcessor& gp,
+                                  const GrGLSLCaps&,
+                                  GrProcessorKeyBuilder* b) {
+            const PLSQuadEdgeEffect& qee = gp.cast<PLSQuadEdgeEffect>();
+            uint32_t key = 0;
+            key |= qee.usesLocalCoords() && qee.localMatrix().hasPerspective() ? 0x1 : 0x0;
+            b->add32(key);
+        }
+
+        virtual void setData(const GrGLSLProgramDataManager& pdman,
+                             const GrPrimitiveProcessor& gp) override {
+        }
+
+        void setTransformData(const GrPrimitiveProcessor& primProc,
+                              const GrGLSLProgramDataManager& pdman,
+                              int index,
+                              const SkTArray<const GrCoordTransform*, true>& transforms) override {
+            this->setTransformDataHelper<PLSQuadEdgeEffect>(primProc, pdman, index, transforms);
+        }
+
+    private:
+        typedef GrGLSLGeometryProcessor INHERITED;
+    };
+
+    virtual void getGLSLProcessorKey(const GrGLSLCaps& caps,
+                                   GrProcessorKeyBuilder* b) const override {
+        GLSLProcessor::GenKey(*this, caps, b);
+    }
+
+    virtual GrGLSLPrimitiveProcessor* createGLSLInstance(const GrGLSLCaps&) const override {
+        return new GLSLProcessor(*this);
+    }
+
+private:
+    PLSQuadEdgeEffect(const SkMatrix& localMatrix, bool usesLocalCoords)
+        : fLocalMatrix(localMatrix)
+        , fUsesLocalCoords(usesLocalCoords) {
+        this->initClassID<PLSQuadEdgeEffect>();
+        fInPosition = &this->addVertexAttrib(Attribute("inPosition", kVec2f_GrVertexAttribType, 
+                                                       kHigh_GrSLPrecision));
+        fInUV = &this->addVertexAttrib(Attribute("inUV", kVec2f_GrVertexAttribType, 
+                                                 kHigh_GrSLPrecision));
+        fInEndpoint1 = &this->addVertexAttrib(Attribute("inEndpoint1", kVec2f_GrVertexAttribType, 
+                                                        kHigh_GrSLPrecision));
+        fInEndpoint2 = &this->addVertexAttrib(Attribute("inEndpoint2", kVec2f_GrVertexAttribType, 
+                                                        kHigh_GrSLPrecision));
+        fInWindings  = &this->addVertexAttrib(Attribute("inWindings", kInt_GrVertexAttribType, 
+                                                        kLow_GrSLPrecision));
+        this->setWillReadFragmentPosition();
+    }
+
+    const Attribute* fInPosition;
+    const Attribute* fInUV;
+    const Attribute* fInEndpoint1;
+    const Attribute* fInEndpoint2;
+    const Attribute* fInWindings;
+    SkMatrix         fLocalMatrix;
+    bool             fUsesLocalCoords;
+    
+    GR_DECLARE_GEOMETRY_PROCESSOR_TEST;
+
+    typedef GrGeometryProcessor INHERITED;
+};
+
+class PLSFinishEffect : public GrGeometryProcessor {
+public:
+
+    static GrGeometryProcessor* Create(GrColor color, bool useEvenOdd, const SkMatrix& localMatrix,
+                                       bool usesLocalCoords) {
+        return new PLSFinishEffect(color, useEvenOdd, localMatrix, usesLocalCoords);
+    }
+
+    virtual ~PLSFinishEffect() {}
+
+    const char* name() const override { return "PLSFinish"; }
+
+    const Attribute* inPosition() const { return fInPosition; }
+    GrColor color() const { return fColor; }
+    bool colorIgnored() const { return GrColor_ILLEGAL == fColor; }
+    const SkMatrix& localMatrix() const { return fLocalMatrix; }
+    bool usesLocalCoords() const { return fUsesLocalCoords; }
+    
+    GrPixelLocalStorageState getPixelLocalStorageState() const override { 
+        return GrPixelLocalStorageState::kFinish_GrPixelLocalStorageState;
+    }
+
+    const char* getDestColorOverride() const override {
+        return GR_GL_PLS_DSTCOLOR_NAME; 
+    }
+
+    class GLSLProcessor : public GrGLSLGeometryProcessor {
+    public:
+        GLSLProcessor(const GrGeometryProcessor&) {}
+
+        void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
+            const PLSFinishEffect& fe = args.fGP.cast<PLSFinishEffect>();
+            GrGLSLVertexBuilder* vsBuilder = args.fVertBuilder;
+            GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
+            GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
+
+            fUseEvenOdd = uniformHandler->addUniform(GrGLUniformHandler::kFragment_Visibility,
+                                                    kFloat_GrSLType, kLow_GrSLPrecision, 
+                                                    "useEvenOdd");
+            const char* useEvenOdd = uniformHandler->getUniformCStr(fUseEvenOdd);
+
+            varyingHandler->emitAttributes(fe);
+            this->setupPosition(vsBuilder, gpArgs, fe.inPosition()->fName);
+            this->emitTransforms(vsBuilder, varyingHandler, uniformHandler, gpArgs->fPositionVar, 
+                                 fe.inPosition()->fName, fe.localMatrix(), args.fTransformsIn, 
+                                 args.fTransformsOut);
+
+            GrGLSLFragmentBuilder* fsBuilder = args.fFragBuilder;
+            SkAssertResult(fsBuilder->enableFeature(
+                           GrGLSLFragmentShaderBuilder::kPixelLocalStorage_GLSLFeature));
+            fsBuilder->declAppendf(GR_GL_PLS_PATH_DATA_DECL);
+            fsBuilder->codeAppend("float coverage;");
+            fsBuilder->codeAppendf("if (%s != 0.0) {", useEvenOdd);
+            fsBuilder->codeAppend("coverage = float(abs(pls.windings[0]) % 2) * 0.25;");
+            fsBuilder->codeAppend("coverage += float(abs(pls.windings[1]) % 2) * 0.25;");
+            fsBuilder->codeAppend("coverage += float(abs(pls.windings[2]) % 2) * 0.25;");
+            fsBuilder->codeAppend("coverage += float(abs(pls.windings[3]) % 2) * 0.25;");
+            fsBuilder->codeAppend("} else {");
+            fsBuilder->codeAppend("coverage = pls.windings[0] != 0 ? 0.25 : 0.0;");
+            fsBuilder->codeAppend("coverage += pls.windings[1] != 0 ? 0.25 : 0.0;");
+            fsBuilder->codeAppend("coverage += pls.windings[2] != 0 ? 0.25 : 0.0;");
+            fsBuilder->codeAppend("coverage += pls.windings[3] != 0 ? 0.25 : 0.0;");
+            fsBuilder->codeAppend("}");
+            if (!fe.colorIgnored()) {
+                this->setupUniformColor(fsBuilder, uniformHandler, args.fOutputColor, 
+                                        &fColorUniform);
+            }
+            fsBuilder->codeAppendf("%s = vec4(coverage);", args.fOutputCoverage);
+            fsBuilder->codeAppendf("%s = vec4(1.0, 0.0, 1.0, 1.0);", args.fOutputColor);
+        }
+
+        static inline void GenKey(const GrGeometryProcessor& gp,
+                                  const GrGLSLCaps&,
+                                  GrProcessorKeyBuilder* b) {
+            const PLSFinishEffect& fe = gp.cast<PLSFinishEffect>();
+            uint32_t key = 0;
+            key |= fe.usesLocalCoords() && fe.localMatrix().hasPerspective() ? 0x1 : 0x0;
+            b->add32(key);
+        }
+
+        virtual void setData(const GrGLSLProgramDataManager& pdman,
+                             const GrPrimitiveProcessor& gp) override {
+            const PLSFinishEffect& fe = gp.cast<PLSFinishEffect>();
+            pdman.set1f(fUseEvenOdd, fe.fUseEvenOdd);
+            if (fe.color() != fColor && !fe.colorIgnored()) {
+                GrGLfloat c[4];
+                GrColorToRGBAFloat(fe.color(), c);
+                pdman.set4fv(fColorUniform, 1, c);
+                fColor = fe.color();
+            }
+        }
+
+        void setTransformData(const GrPrimitiveProcessor& primProc,
+                              const GrGLSLProgramDataManager& pdman,
+                              int index,
+                              const SkTArray<const GrCoordTransform*, true>& transforms) override {
+            this->setTransformDataHelper<PLSFinishEffect>(primProc, pdman, index, transforms);
+        }
+
+    private:
+        GrColor fColor;
+        UniformHandle fColorUniform;
+        UniformHandle fUseEvenOdd;
+
+        typedef GrGLSLGeometryProcessor INHERITED;
+    };
+
+    virtual void getGLSLProcessorKey(const GrGLSLCaps& caps,
+                                   GrProcessorKeyBuilder* b) const override {
+        GLSLProcessor::GenKey(*this, caps, b);
+    }
+
+    virtual GrGLSLPrimitiveProcessor* createGLSLInstance(const GrGLSLCaps&) const override {
+        return new GLSLProcessor(*this);
+    }
+
+private:
+    PLSFinishEffect(GrColor color, bool useEvenOdd, const SkMatrix& localMatrix, 
+                    bool usesLocalCoords)
+        : fColor(color)
+        , fUseEvenOdd(useEvenOdd)
+        , fLocalMatrix(localMatrix)
+        , fUsesLocalCoords(usesLocalCoords) {
+        this->initClassID<PLSFinishEffect>();
+        fInPosition = &this->addVertexAttrib(Attribute("inPosition", kVec2f_GrVertexAttribType, 
+                                                       kHigh_GrSLPrecision));
+    }
+
+    const Attribute* fInPosition;
+    GrColor          fColor;
+    bool             fUseEvenOdd;
+    SkMatrix         fLocalMatrix;
+    bool             fUsesLocalCoords;
+
+    typedef GrGeometryProcessor INHERITED;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool GrPLSPathRenderer::onCanDrawPath(const CanDrawPathArgs& args) const {
+    // We have support for even-odd rendering, but are having some troublesome
+    // seams. Disable in the presence of even-odd for now.
+    return args.fShaderCaps->shaderDerivativeSupport() && args.fAntiAlias &&
+            args.fStroke->isFillStyle() && !args.fPath->isInverseFillType() &&
+            args.fPath->getFillType() == SkPath::FillType::kWinding_FillType;
+}
+
+class PLSPathBatch : public GrVertexBatch {
+public:
+    DEFINE_BATCH_CLASS_ID
+    struct Geometry {
+        GrColor fColor;
+        SkMatrix fViewMatrix;
+        SkPath fPath;
+    };
+
+    static GrDrawBatch* Create(const Geometry& geometry) {
+        return new PLSPathBatch(geometry);
+    }
+
+    const char* name() const override { return "PLSBatch"; }
+
+    void computePipelineOptimizations(GrInitInvariantOutput* color, 
+                                      GrInitInvariantOutput* coverage,
+                                      GrBatchToXPOverrides* overrides) const override {
+        // When this is called on a batch, there is only one geometry bundle
+        color->setKnownFourComponents(fGeoData[0].fColor);
+        coverage->setUnknownSingleComponent();
+        overrides->fUsePLSDstRead = true;
+    }
+
+    void initBatchTracker(const GrXPOverridesForBatch& overrides) override {
+        // Handle any color overrides
+        if (!overrides.readsColor()) {
+            fGeoData[0].fColor = GrColor_ILLEGAL;
+        }
+        overrides.getOverrideColorIfSet(&fGeoData[0].fColor);
+
+        // setup batch properties
+        fBatch.fColorIgnored = !overrides.readsColor();
+        fBatch.fColor = fGeoData[0].fColor;
+        fBatch.fUsesLocalCoords = overrides.readsLocalCoords();
+        fBatch.fCoverageIgnored = !overrides.readsCoverage();
+        fBatch.fCanTweakAlphaForCoverage = overrides.canTweakAlphaForCoverage();
+    }
+
+    void onPrepareDraws(Target* target) const override {
+        int instanceCount = fGeoData.count();
+
+        SkMatrix invert;
+        if (this->usesLocalCoords() && !this->viewMatrix().invert(&invert)) {
+            SkDebugf("Could not invert viewmatrix\n");
+            return;
+        }
+
+        // Setup GrGeometryProcessors
+        SkAutoTUnref<GrPLSGeometryProcessor> triangleProcessor(
+                PLSAATriangleEffect::Create(invert, this->usesLocalCoords()));
+        SkAutoTUnref<GrPLSGeometryProcessor> quadProcessor(
+                PLSQuadEdgeEffect::Create(invert, this->usesLocalCoords()));
+
+        GrResourceProvider* rp = target->resourceProvider();
+        for (int i = 0; i < instanceCount; ++i) {
+            const Geometry& args = fGeoData[i];
+            SkRect bounds = args.fPath.getBounds();
+            args.fViewMatrix.mapRect(&bounds);
+            bounds.fLeft = SkScalarFloorToScalar(bounds.fLeft);
+            bounds.fTop = SkScalarFloorToScalar(bounds.fTop);
+            bounds.fRight = SkScalarCeilToScalar(bounds.fRight);
+            bounds.fBottom = SkScalarCeilToScalar(bounds.fBottom);
+            triangleProcessor->setBounds(bounds);
+            quadProcessor->setBounds(bounds);
+
+            // We use the fact that SkPath::transform path does subdivision based on
+            // perspective. Otherwise, we apply the view matrix when copying to the
+            // segment representation.
+            const SkMatrix* viewMatrix = &args.fViewMatrix;
+
+            // We avoid initializing the path unless we have to
+            const SkPath* pathPtr = &args.fPath;
+            SkTLazy<SkPath> tmpPath;
+            if (viewMatrix->hasPerspective()) {
+                SkPath* tmpPathPtr = tmpPath.init(*pathPtr);
+                tmpPathPtr->setIsVolatile(true);
+                tmpPathPtr->transform(*viewMatrix);
+                viewMatrix = &SkMatrix::I();
+                pathPtr = tmpPathPtr;
+            }
+
+            GrVertices grVertices;
+
+            PLSVertices triVertices;
+            PLSVertices quadVertices;
+            if (!get_geometry(*pathPtr, *viewMatrix, triVertices, quadVertices, rp, bounds)) {
+                continue;
+            }
+
+            if (triVertices.count()) {
+                const GrVertexBuffer* triVertexBuffer;
+                int firstTriVertex;
+                size_t triStride = triangleProcessor->getVertexStride();
+                PLSVertex* triVerts = reinterpret_cast<PLSVertex*>(target->makeVertexSpace(
+                        triStride, triVertices.count(), &triVertexBuffer, &firstTriVertex));
+                if (!triVerts) {
+                    SkDebugf("Could not allocate vertices\n");
+                    return;
+                }
+                for (int i = 0; i < triVertices.count(); ++i) {
+                    triVerts[i] = triVertices[i];
+                }
+                grVertices.init(kTriangles_GrPrimitiveType, triVertexBuffer, firstTriVertex, 
+                                triVertices.count());
+                target->initDraw(triangleProcessor, this->pipeline());
+                target->draw(grVertices);
+            }
+
+            if (quadVertices.count()) {
+                const GrVertexBuffer* quadVertexBuffer;
+                int firstQuadVertex;
+                size_t quadStride = quadProcessor->getVertexStride();
+                PLSVertex* quadVerts = reinterpret_cast<PLSVertex*>(target->makeVertexSpace(
+                        quadStride, quadVertices.count(), &quadVertexBuffer, &firstQuadVertex));
+                if (!quadVerts) {
+                    SkDebugf("Could not allocate vertices\n");
+                    return;
+                }
+                for (int i = 0; i < quadVertices.count(); ++i) {
+                    quadVerts[i] = quadVertices[i];
+                }
+                grVertices.init(kTriangles_GrPrimitiveType, quadVertexBuffer, firstQuadVertex, 
+                                quadVertices.count());
+                target->initDraw(quadProcessor, this->pipeline());
+                target->draw(grVertices);
+            }
+
+            SkAutoTUnref<GrGeometryProcessor> finishProcessor(
+                    PLSFinishEffect::Create(this->color(), 
+                                            pathPtr->getFillType() == 
+                                                                SkPath::FillType::kEvenOdd_FillType,
+                                            invert, 
+                                            this->usesLocalCoords()));
+            const GrVertexBuffer* rectVertexBuffer;
+            size_t finishStride = finishProcessor->getVertexStride();
+            int firstRectVertex;
+            static const int kRectVertexCount = 6;
+            SkPoint* rectVerts = reinterpret_cast<SkPoint*>(target->makeVertexSpace(
+                    finishStride, kRectVertexCount, &rectVertexBuffer, &firstRectVertex));
+            if (!rectVerts) {
+                SkDebugf("Could not allocate vertices\n");
+                return;
+            }
+            rectVerts[0] = { bounds.fLeft, bounds.fTop };
+            rectVerts[1] = { bounds.fLeft, bounds.fBottom };
+            rectVerts[2] = { bounds.fRight, bounds.fBottom };
+            rectVerts[3] = { bounds.fLeft, bounds.fTop };
+            rectVerts[4] = { bounds.fRight, bounds.fTop };
+            rectVerts[5] = { bounds.fRight, bounds.fBottom };
+
+            grVertices.init(kTriangles_GrPrimitiveType, rectVertexBuffer, firstRectVertex, 
+                            kRectVertexCount);
+            target->initDraw(finishProcessor, this->pipeline());
+            target->draw(grVertices);
+        }
+    }
+
+    SkSTArray<1, Geometry, true>* geoData() { return &fGeoData; }
+
+private:
+    PLSPathBatch(const Geometry& geometry) : INHERITED(ClassID()) {
+        fGeoData.push_back(geometry);
+
+        // compute bounds
+        fBounds = geometry.fPath.getBounds();
+        geometry.fViewMatrix.mapRect(&fBounds);
+    }
+
+    bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override {
+        return false;
+    }
+
+    GrColor color() const { return fBatch.fColor; }
+    bool usesLocalCoords() const { return fBatch.fUsesLocalCoords; }
+    bool canTweakAlphaForCoverage() const { return fBatch.fCanTweakAlphaForCoverage; }
+    const SkMatrix& viewMatrix() const { return fGeoData[0].fViewMatrix; }
+    bool coverageIgnored() const { return fBatch.fCoverageIgnored; }
+
+    struct BatchTracker {
+        GrColor fColor;
+        bool fUsesLocalCoords;
+        bool fColorIgnored;
+        bool fCoverageIgnored;
+        bool fCanTweakAlphaForCoverage;
+    };
+
+    BatchTracker fBatch;
+    SkSTArray<1, Geometry, true> fGeoData;
+
+    typedef GrVertexBatch INHERITED;
+};
+
+SkDEBUGCODE(bool inPLSDraw = false;)
+bool GrPLSPathRenderer::onDrawPath(const DrawPathArgs& args) {
+    if (args.fPath->isEmpty()) {
+        return true;
+    }
+    SkASSERT(!inPLSDraw);
+    SkDEBUGCODE(inPLSDraw = true;)
+    PLSPathBatch::Geometry geometry;
+    geometry.fColor = args.fColor;
+    geometry.fViewMatrix = *args.fViewMatrix;
+    geometry.fPath = *args.fPath;
+
+    SkAutoTUnref<GrDrawBatch> batch(PLSPathBatch::Create(geometry));
+    args.fTarget->drawBatch(*args.fPipelineBuilder, batch);
+
+    SkDEBUGCODE(inPLSDraw = false;)
+    return true;
+
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef GR_TEST_UTILS
+
+DRAW_BATCH_TEST_DEFINE(PLSPathBatch) {
+    PLSPathBatch::Geometry geometry;
+    geometry.fColor = GrRandomColor(random);
+    geometry.fViewMatrix = GrTest::TestMatrixInvertible(random);
+    geometry.fPath = GrTest::TestPathConvex(random);
+
+    return PLSPathBatch::Create(geometry);
+}
+
+#endif
diff --git a/src/gpu/batches/GrPLSPathRenderer.h b/src/gpu/batches/GrPLSPathRenderer.h
new file mode 100644
index 0000000..d701f62
--- /dev/null
+++ b/src/gpu/batches/GrPLSPathRenderer.h
@@ -0,0 +1,49 @@
+
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrPLSPathRenderer_DEFINED
+#define GrPLSPathRenderer_DEFINED
+
+#include "GrPathRenderer.h"
+
+/*
+ * Renders arbitrary antialiased paths using pixel local storage as a scratch buffer. The overall
+ * technique is very similar to the approach presented in "Resolution independent rendering of 
+ * deformable vector objects using graphics hardware" by Kokojima et al.
+
+ * We first render the straight-line portions of the path (essentially pretending as if all segments
+ * were kLine_Verb) as a triangle fan, using a fragment shader which updates the winding counts 
+ * appropriately. We then render the curved portions of the path using a Loop-Blinn shader which 
+ * calculates which portion of the triangle is covered by the quad (conics and cubics are split down
+ * to quads). Where we diverge from Kokojima is that, instead of rendering into the stencil buffer 
+ * and using built-in MSAA to handle straight-line antialiasing, we use the pixel local storage area
+ * and calculate the MSAA ourselves in the fragment shader. Essentially, we manually evaluate the 
+ * coverage of each pixel four times, storing four winding counts into the pixel local storage area,
+ * and compute the final coverage based on those winding counts.
+ *
+ * Our approach is complicated by the need to perform antialiasing on straight edges as well, 
+ * without relying on hardware MSAA. We instead bloat the triangles to ensure complete coverage, 
+ * pass the original (un-bloated) vertices in to the fragment shader, and then have the fragment 
+ * shader use these vertices to evaluate whether a given sample is located within the triangle or 
+ * not. This gives us MSAA4 edges on triangles which line up nicely with no seams. We similarly face
+ * problems on the back (flat) edges of quads, where we have to ensure that the back edge is 
+ * antialiased in the same way. Similar to the triangle case, we pass in the two (unbloated) 
+ * vertices defining the back edge of the quad and the fragment shader uses these vertex coordinates
+ * to discard samples falling on the other side of the quad's back edge.
+ */
+class GrPLSPathRenderer : public GrPathRenderer {
+public:
+    GrPLSPathRenderer();
+
+    bool onCanDrawPath(const CanDrawPathArgs& args) const override;
+
+protected:
+    bool onDrawPath(const DrawPathArgs& args) override;
+};
+
+#endif
diff --git a/src/gpu/batches/GrTInstanceBatch.h b/src/gpu/batches/GrTInstanceBatch.h
index bf37168..5f9a6c5 100644
--- a/src/gpu/batches/GrTInstanceBatch.h
+++ b/src/gpu/batches/GrTInstanceBatch.h
@@ -65,7 +65,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         Impl::InitInvariantOutputCoverage(coverage);
-        overrides->fUsePLSDstRead = false;
     }
 
     void initBatchTracker(const GrXPOverridesForBatch& overrides) override {
diff --git a/src/gpu/batches/GrTessellatingPathRenderer.cpp b/src/gpu/batches/GrTessellatingPathRenderer.cpp
index 81436ba..a312db6 100644
--- a/src/gpu/batches/GrTessellatingPathRenderer.cpp
+++ b/src/gpu/batches/GrTessellatingPathRenderer.cpp
@@ -91,7 +91,6 @@
                                       GrBatchToXPOverrides* overrides) const override {
         color->setKnownFourComponents(fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
 private:
diff --git a/src/gpu/batches/GrTestBatch.h b/src/gpu/batches/GrTestBatch.h
index 9427504..ee92816 100644
--- a/src/gpu/batches/GrTestBatch.h
+++ b/src/gpu/batches/GrTestBatch.h
@@ -32,7 +32,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(this->geoData(0)->fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
     void initBatchTracker(const GrXPOverridesForBatch& overrides) override {
diff --git a/src/gpu/effects/GrCoverageSetOpXP.cpp b/src/gpu/effects/GrCoverageSetOpXP.cpp
index 7761a64..8c98c6c 100644
--- a/src/gpu/effects/GrCoverageSetOpXP.cpp
+++ b/src/gpu/effects/GrCoverageSetOpXP.cpp
@@ -9,6 +9,7 @@
 #include "effects/GrCoverageSetOpXP.h"
 #include "GrCaps.h"
 #include "GrColor.h"
+#include "GrPipeline.h"
 #include "GrProcessor.h"
 #include "GrProcOptInfo.h"
 #include "glsl/GrGLSLBlend.h"
@@ -151,7 +152,6 @@
 public:
     ShaderCSOXferProcessor(const DstTexture* dstTexture,
                            bool hasMixedSamples,
-                           SkXfermode::Mode xfermode,
                            SkRegion::Op regionOp,
                            bool invertCoverage)
         : INHERITED(dstTexture, true, hasMixedSamples)
@@ -323,6 +323,9 @@
         return nullptr;
     }
 
+    if (optimizations.fOverrides.fUsePLSDstRead) {
+        return new ShaderCSOXferProcessor(dst, hasMixedSamples, fRegionOp, fInvertCoverage);
+    }
     return CoverageSetOpXP::Create(fRegionOp, fInvertCoverage);
 }
 
diff --git a/src/gpu/effects/GrCustomXfermode.cpp b/src/gpu/effects/GrCustomXfermode.cpp
index 459ff52..dccec0c 100644
--- a/src/gpu/effects/GrCustomXfermode.cpp
+++ b/src/gpu/effects/GrCustomXfermode.cpp
@@ -54,12 +54,15 @@
 }
 
 static bool can_use_hw_blend_equation(GrBlendEquation equation,
-                                      const GrProcOptInfo& coveragePOI,
+                                      const GrPipelineOptimizations& opt,
                                       const GrCaps& caps) {
     if (!caps.advancedBlendEquationSupport()) {
         return false;
     }
-    if (coveragePOI.isFourChannelOutput()) {
+    if (opt.fOverrides.fUsePLSDstRead) {
+        return false;
+    }
+    if (opt.fCoveragePOI.isFourChannelOutput()) {
         return false; // LCD coverage must be applied after the blend equation.
     }
     if (caps.canUseAdvancedBlendEquation(equation)) {
@@ -334,7 +337,7 @@
                                            bool hasMixedSamples,
                                            const DstTexture*) const override;
 
-    bool willReadDstColor(const GrCaps& caps,
+    bool onWillReadDstColor(const GrCaps& caps,
                           const GrPipelineOptimizations& optimizations,
                           bool hasMixedSamples) const override;
 
@@ -362,17 +365,17 @@
                                                         const GrPipelineOptimizations& opt,
                                                         bool hasMixedSamples,
                                                         const DstTexture* dstTexture) const {
-    if (can_use_hw_blend_equation(fHWBlendEquation, opt.fCoveragePOI, caps)) {
+    if (can_use_hw_blend_equation(fHWBlendEquation, opt, caps)) {
         SkASSERT(!dstTexture || !dstTexture->texture());
         return new CustomXP(fMode, fHWBlendEquation);
     }
     return new CustomXP(dstTexture, hasMixedSamples, fMode);
 }
 
-bool CustomXPFactory::willReadDstColor(const GrCaps& caps,
-                                       const GrPipelineOptimizations& optimizations,
-                                       bool hasMixedSamples) const {
-    return !can_use_hw_blend_equation(fHWBlendEquation, optimizations.fCoveragePOI, caps);
+bool CustomXPFactory::onWillReadDstColor(const GrCaps& caps,
+                                         const GrPipelineOptimizations& optimizations,
+                                         bool hasMixedSamples) const {
+    return !can_use_hw_blend_equation(fHWBlendEquation, optimizations, caps);
 }
 
 void CustomXPFactory::getInvariantBlendedColor(const GrProcOptInfo& colorPOI,
diff --git a/src/gpu/effects/GrDashingEffect.cpp b/src/gpu/effects/GrDashingEffect.cpp
index 1516efd..09dc90a 100644
--- a/src/gpu/effects/GrDashingEffect.cpp
+++ b/src/gpu/effects/GrDashingEffect.cpp
@@ -275,7 +275,6 @@
         // When this is called on a batch, there is only one geometry bundle
         color->setKnownFourComponents(fGeoData[0].fColor);
         coverage->setUnknownSingleComponent();
-        overrides->fUsePLSDstRead = false;
     }
 
     SkSTArray<1, Geometry, true>* geoData() { return &fGeoData; }
diff --git a/src/gpu/effects/GrDisableColorXP.cpp b/src/gpu/effects/GrDisableColorXP.cpp
index 70e40e4..937bea8 100644
--- a/src/gpu/effects/GrDisableColorXP.cpp
+++ b/src/gpu/effects/GrDisableColorXP.cpp
@@ -6,6 +6,7 @@
  */
 
 #include "effects/GrDisableColorXP.h"
+#include "GrPipeline.h"
 #include "GrProcessor.h"
 #include "glsl/GrGLSLFragmentShaderBuilder.h"
 #include "glsl/GrGLSLProgramDataManager.h"
@@ -97,6 +98,7 @@
                                                const GrPipelineOptimizations& optimizations,
                                                bool hasMixedSamples,
                                                const DstTexture* dst) const {
+    SkASSERT(!optimizations.fOverrides.fUsePLSDstRead);
     return DisableColorXP::Create();
 }
 
diff --git a/src/gpu/effects/GrDisableColorXP.h b/src/gpu/effects/GrDisableColorXP.h
index 94aae31..21e6932 100644
--- a/src/gpu/effects/GrDisableColorXP.h
+++ b/src/gpu/effects/GrDisableColorXP.h
@@ -31,9 +31,9 @@
                                            bool hasMixedSamples,
                                            const DstTexture* dstTexture) const override;
 
-    bool willReadDstColor(const GrCaps& caps,
-                          const GrPipelineOptimizations& optimizations,
-                          bool hasMixedSamples) const override {
+    bool onWillReadDstColor(const GrCaps& caps,
+                            const GrPipelineOptimizations& optimizations,
+                            bool hasMixedSamples) const override {
         return false;
     }
 
diff --git a/src/gpu/effects/GrPorterDuffXferProcessor.cpp b/src/gpu/effects/GrPorterDuffXferProcessor.cpp
index c443e1e..d1059c7 100644
--- a/src/gpu/effects/GrPorterDuffXferProcessor.cpp
+++ b/src/gpu/effects/GrPorterDuffXferProcessor.cpp
@@ -739,6 +739,9 @@
                                              const GrPipelineOptimizations& optimizations,
                                              bool hasMixedSamples,
                                              const DstTexture* dstTexture) const {
+    if (optimizations.fOverrides.fUsePLSDstRead) {
+        return new ShaderPDXferProcessor(dstTexture, hasMixedSamples, fXfermode);
+    }
     BlendFormula blendFormula;
     if (optimizations.fCoveragePOI.isFourChannelOutput()) {
         if (SkXfermode::kSrcOver_Mode == fXfermode &&
@@ -795,9 +798,9 @@
     }
 }
 
-bool GrPorterDuffXPFactory::willReadDstColor(const GrCaps& caps,
-                                             const GrPipelineOptimizations& optimizations,
-                                             bool hasMixedSamples) const {
+bool GrPorterDuffXPFactory::onWillReadDstColor(const GrCaps& caps,
+                                               const GrPipelineOptimizations& optimizations,
+                                               bool hasMixedSamples) const {
     if (caps.shaderCaps()->dualSourceBlendingSupport()) {
         return false;
     }
@@ -854,6 +857,9 @@
         const GrPipelineOptimizations& optimizations,
         bool hasMixedSamples,
         const GrXferProcessor::DstTexture* dstTexture) {
+    if (optimizations.fOverrides.fUsePLSDstRead) {
+        return new ShaderPDXferProcessor(dstTexture, hasMixedSamples, SkXfermode::kSrcOver_Mode);
+    }
     if (!optimizations.fCoveragePOI.isFourChannelOutput() &&
         !(optimizations.fCoveragePOI.isSolidWhite() &&
           !hasMixedSamples &&
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index 0bfcc39..d95e10b 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -284,6 +284,17 @@
             ctxInfo.hasExtension("GL_OES_standard_derivatives");
     }
 
+    if (ctxInfo.hasExtension("GL_EXT_shader_pixel_local_storage")) {
+        #define GL_MAX_SHADER_PIXEL_LOCAL_STORAGE_FAST_SIZE_EXT 0x8F63
+        GR_GL_GetIntegerv(gli, GL_MAX_SHADER_PIXEL_LOCAL_STORAGE_FAST_SIZE_EXT, 
+                          &glslCaps->fPixelLocalStorageSize);
+        glslCaps->fPLSPathRenderingSupport = glslCaps->fFBFetchSupport;
+    }
+    else {
+        glslCaps->fPixelLocalStorageSize = 0;
+        glslCaps->fPLSPathRenderingSupport = false;
+    }
+
     /**************************************************************************
      * GrCaps fields
      **************************************************************************/
@@ -531,7 +542,6 @@
 
     GrGLSLCaps* glslCaps = static_cast<GrGLSLCaps*>(fShaderCaps.get());
     glslCaps->fGLSLGeneration = ctxInfo.glslGeneration();
-
     if (kGLES_GrGLStandard == standard) {
         if (ctxInfo.hasExtension("GL_EXT_shader_framebuffer_fetch")) {
             glslCaps->fFBFetchNeedsCustomOutput = (version >= GR_GL_VER(3, 0));
diff --git a/src/gpu/gl/GrGLDefines.h b/src/gpu/gl/GrGLDefines.h
index 318fcd4..a759413 100644
--- a/src/gpu/gl/GrGLDefines.h
+++ b/src/gpu/gl/GrGLDefines.h
@@ -161,6 +161,7 @@
 #define GR_GL_VERTEX_PROGRAM_POINT_SIZE      0x8642
 #define GR_GL_LINE_STIPPLE                   0x0B24
 #define GR_GL_FRAMEBUFFER_SRGB               0x8DB9
+#define GR_GL_SHADER_PIXEL_LOCAL_STORAGE     0x8F64
 
 /* ErrorCode */
 #define GR_GL_NO_ERROR                       0
@@ -421,29 +422,30 @@
 #define GR_GL_UNSIGNED_SHORT_5_6_5           0x8363
 
 /* Shaders */
-#define GR_GL_FRAGMENT_SHADER                  0x8B30
-#define GR_GL_VERTEX_SHADER                    0x8B31
-#define GR_GL_GEOMETRY_SHADER                  0x8DD9
-#define GR_GL_MAX_VERTEX_ATTRIBS               0x8869
-#define GR_GL_MAX_VERTEX_UNIFORM_VECTORS       0x8DFB
-#define GR_GL_MAX_VARYING_VECTORS              0x8DFC
-#define GR_GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
-#define GR_GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS   0x8B4C
-#define GR_GL_MAX_TEXTURE_IMAGE_UNITS          0x8872
-#define GR_GL_MAX_FRAGMENT_UNIFORM_VECTORS     0x8DFD
-#define GR_GL_SHADER_TYPE                      0x8B4F
-#define GR_GL_DELETE_STATUS                    0x8B80
-#define GR_GL_LINK_STATUS                      0x8B82
-#define GR_GL_VALIDATE_STATUS                  0x8B83
-#define GR_GL_ATTACHED_SHADERS                 0x8B85
-#define GR_GL_ACTIVE_UNIFORMS                  0x8B86
-#define GR_GL_ACTIVE_UNIFORM_MAX_LENGTH        0x8B87
-#define GR_GL_ACTIVE_ATTRIBUTES                0x8B89
-#define GR_GL_ACTIVE_ATTRIBUTE_MAX_LENGTH      0x8B8A
-#define GR_GL_SHADING_LANGUAGE_VERSION         0x8B8C
-#define GR_GL_CURRENT_PROGRAM                  0x8B8D
-#define GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS  0x8B49
-#define GR_GL_MAX_VERTEX_UNIFORM_COMPONENTS    0x8B4A
+#define GR_GL_FRAGMENT_SHADER                          0x8B30
+#define GR_GL_VERTEX_SHADER                            0x8B31
+#define GR_GL_GEOMETRY_SHADER                          0x8DD9
+#define GR_GL_MAX_VERTEX_ATTRIBS                       0x8869
+#define GR_GL_MAX_VERTEX_UNIFORM_VECTORS               0x8DFB
+#define GR_GL_MAX_VARYING_VECTORS                      0x8DFC
+#define GR_GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS         0x8B4D
+#define GR_GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS           0x8B4C
+#define GR_GL_MAX_TEXTURE_IMAGE_UNITS                  0x8872
+#define GR_GL_MAX_FRAGMENT_UNIFORM_VECTORS             0x8DFD
+#define GR_GL_SHADER_TYPE                              0x8B4F
+#define GR_GL_DELETE_STATUS                            0x8B80
+#define GR_GL_LINK_STATUS                              0x8B82
+#define GR_GL_VALIDATE_STATUS                          0x8B83
+#define GR_GL_ATTACHED_SHADERS                         0x8B85
+#define GR_GL_ACTIVE_UNIFORMS                          0x8B86
+#define GR_GL_ACTIVE_UNIFORM_MAX_LENGTH                0x8B87
+#define GR_GL_ACTIVE_ATTRIBUTES                        0x8B89
+#define GR_GL_ACTIVE_ATTRIBUTE_MAX_LENGTH              0x8B8A
+#define GR_GL_SHADING_LANGUAGE_VERSION                 0x8B8C
+#define GR_GL_CURRENT_PROGRAM                          0x8B8D
+#define GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS          0x8B49
+#define GR_GL_MAX_VERTEX_UNIFORM_COMPONENTS            0x8B4A
+#define GR_GL_MAX_SHADER_PIXEL_LOCAL_STORAGE_FAST_SIZE 0x8F63
 
 /* StencilFunction */
 #define GR_GL_NEVER                          0x0200
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index acaf8ca..6a61f31 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -12,6 +12,7 @@
 #include "GrGLTextureRenderTarget.h"
 #include "GrGpuResourcePriv.h"
 #include "GrPipeline.h"
+#include "GrPLSGeometryProcessor.h"
 #include "GrRenderTargetPriv.h"
 #include "GrSurfacePriv.h"
 #include "GrTexturePriv.h"
@@ -20,6 +21,7 @@
 #include "builders/GrGLShaderStringBuilder.h"
 #include "glsl/GrGLSL.h"
 #include "glsl/GrGLSLCaps.h"
+#include "glsl/GrGLSLPLSPathRendering.h"
 #include "SkStrokeRec.h"
 #include "SkTemplates.h"
 
@@ -38,7 +40,6 @@
     #define CHECK_ALLOC_ERROR(iface)          GR_GL_NO_ERROR
 #endif
 
-
 ///////////////////////////////////////////////////////////////////////////////
 
 
@@ -224,6 +225,14 @@
     this->createCopyPrograms();
     fWireRectProgram.fProgram = 0;
     fWireRectArrayBuffer = 0;
+    if (this->glCaps().shaderCaps()->plsPathRenderingSupport()) {
+        this->createPLSSetupProgram();
+    }
+    else {
+        memset(&fPLSSetupProgram, 0, sizeof(fPLSSetupProgram));
+    }
+    fHWPLSEnabled = false;
+    fPLSHasBeenUsed = false;
 }
 
 GrGLGpu::~GrGLGpu() {
@@ -265,9 +274,106 @@
         GL_CALL(DeleteBuffers(1, &fWireRectArrayBuffer));
     }
 
+    if (0 != fPLSSetupProgram.fArrayBuffer) {
+        GL_CALL(DeleteBuffers(1, &fPLSSetupProgram.fArrayBuffer));
+    }
+
+    if (0 != fPLSSetupProgram.fProgram) {
+        GL_CALL(DeleteProgram(fPLSSetupProgram.fProgram));
+    }
+
     delete fProgramCache;
 }
 
+void GrGLGpu::createPLSSetupProgram() {
+    const char* version = this->glCaps().glslCaps()->versionDeclString();
+
+    GrGLSLShaderVar aVertex("a_vertex", kVec2f_GrSLType, GrShaderVar::kAttribute_TypeModifier);
+    GrGLSLShaderVar uTexCoordXform("u_texCoordXform", kVec4f_GrSLType,
+                                   GrShaderVar::kUniform_TypeModifier);
+    GrGLSLShaderVar uPosXform("u_posXform", kVec4f_GrSLType, GrShaderVar::kUniform_TypeModifier);
+    GrGLSLShaderVar uTexture("u_texture", kSampler2D_GrSLType, GrShaderVar::kUniform_TypeModifier);
+    GrGLSLShaderVar vTexCoord("v_texCoord", kVec2f_GrSLType, GrShaderVar::kVaryingOut_TypeModifier);
+    
+    SkString vshaderTxt(version);
+    aVertex.appendDecl(this->glCaps().glslCaps(), &vshaderTxt);
+    vshaderTxt.append(";");
+    uTexCoordXform.appendDecl(this->glCaps().glslCaps(), &vshaderTxt);
+    vshaderTxt.append(";");
+    uPosXform.appendDecl(this->glCaps().glslCaps(), &vshaderTxt);
+    vshaderTxt.append(";");
+    vTexCoord.appendDecl(this->glCaps().glslCaps(), &vshaderTxt);
+    vshaderTxt.append(";");
+    
+    vshaderTxt.append(
+        "// PLS Setup Program VS\n"
+        "void main() {"
+        "  gl_Position.xy = a_vertex * u_posXform.xy + u_posXform.zw;"
+        "  gl_Position.zw = vec2(0, 1);"
+        "}"
+    );
+
+    SkString fshaderTxt(version);
+    fshaderTxt.append("#extension ");
+    fshaderTxt.append(this->glCaps().glslCaps()->fbFetchExtensionString());
+    fshaderTxt.append(" : require\n");
+    fshaderTxt.append("#extension GL_EXT_shader_pixel_local_storage : require\n");
+    GrGLSLAppendDefaultFloatPrecisionDeclaration(kDefault_GrSLPrecision,
+                                                 *this->glCaps().glslCaps(),
+                                                 &fshaderTxt);
+    vTexCoord.setTypeModifier(GrShaderVar::kVaryingIn_TypeModifier);
+    vTexCoord.appendDecl(this->glCaps().glslCaps(), &fshaderTxt);
+    fshaderTxt.append(";");
+    uTexture.appendDecl(this->glCaps().glslCaps(), &fshaderTxt);
+    fshaderTxt.append(";");
+
+    fshaderTxt.appendf(
+        "// PLS Setup Program FS\n"
+        GR_GL_PLS_PATH_DATA_DECL
+        "void main() {\n"
+        "    " GR_GL_PLS_DSTCOLOR_NAME " = gl_LastFragColorARM;\n"
+        "    pls.windings = ivec4(0, 0, 0, 0);\n"
+        "}"
+    );
+    GL_CALL_RET(fPLSSetupProgram.fProgram, CreateProgram());
+    const char* str;
+    GrGLint length;
+
+    str = vshaderTxt.c_str();
+    length = SkToInt(vshaderTxt.size());
+    GrGLuint vshader = GrGLCompileAndAttachShader(*fGLContext, fPLSSetupProgram.fProgram,
+                                                  GR_GL_VERTEX_SHADER, &str, &length, 1, &fStats);
+
+    str = fshaderTxt.c_str();
+    length = SkToInt(fshaderTxt.size());
+    GrGLuint fshader = GrGLCompileAndAttachShader(*fGLContext, fPLSSetupProgram.fProgram,
+                                                  GR_GL_FRAGMENT_SHADER, &str, &length, 1, &fStats);
+
+    GL_CALL(LinkProgram(fPLSSetupProgram.fProgram));
+
+    GL_CALL_RET(fPLSSetupProgram.fPosXformUniform, GetUniformLocation(fPLSSetupProgram.fProgram,
+                                                                  "u_posXform"));
+
+    GL_CALL(BindAttribLocation(fPLSSetupProgram.fProgram, 0, "a_vertex"));
+
+    GL_CALL(DeleteShader(vshader));
+    GL_CALL(DeleteShader(fshader));
+
+    GL_CALL(GenBuffers(1, &fPLSSetupProgram.fArrayBuffer));
+    fHWGeometryState.setVertexBufferID(this, fPLSSetupProgram.fArrayBuffer);
+    static const GrGLfloat vdata[] = {
+        0, 0,
+        0, 1,
+        1, 0,
+        1, 1
+    };
+    GL_ALLOC_CALL(this->glInterface(),
+                  BufferData(GR_GL_ARRAY_BUFFER,
+                             (GrGLsizeiptr) sizeof(vdata),
+                             vdata,  // data ptr
+                             GR_GL_STATIC_DRAW));
+}
+
 void GrGLGpu::contextAbandoned() {
     INHERITED::contextAbandoned();
     fProgramCache->abandon();
@@ -2268,8 +2374,28 @@
     return true;
 }
 
-void GrGLGpu::flushRenderTarget(GrGLRenderTarget* target, const SkIRect* bound) {
+void GrGLGpu::performFlushWorkaround() {
+    if (fPLSHasBeenUsed) {
+        /* There is an ARM driver bug where if we use PLS, and then draw a frame which does not 
+         * use PLS, it leaves garbage all over the place. As a workaround, we use PLS in a 
+         * trivial way every frame. And since we use it every frame, there's never a point at which
+         * it becomes safe to stop using this workaround once we start.
+         */
+        this->disableScissor();
+        // using PLS in the presence of MSAA results in GL_INVALID_OPERATION
+        this->flushHWAAState(nullptr, false);
+        SkASSERT(!fHWPLSEnabled);
+        SkASSERT(fMSAAEnabled != kYes_TriState);
+        GL_CALL(Enable(GR_GL_SHADER_PIXEL_LOCAL_STORAGE));
+        this->stampRectUsingProgram(fPLSSetupProgram.fProgram, 
+                                    SkRect::MakeXYWH(-100.0f, -100.0f, 0.01f, 0.01f), 
+                                    fPLSSetupProgram.fPosXformUniform, 
+                                    fPLSSetupProgram.fArrayBuffer);
+        GL_CALL(Disable(GR_GL_SHADER_PIXEL_LOCAL_STORAGE));
+    }
+}
 
+void GrGLGpu::flushRenderTarget(GrGLRenderTarget* target, const SkIRect* bound) {
     SkASSERT(target);
 
     uint32_t rtID = target->getUniqueID();
@@ -2352,6 +2478,19 @@
         return;
     }
 
+    GrPixelLocalStorageState plsState = args.fPrimitiveProcessor->getPixelLocalStorageState();
+    if (!fHWPLSEnabled && plsState != 
+        GrPixelLocalStorageState::kDisabled_GrPixelLocalStorageState) {
+        GL_CALL(Enable(GR_GL_SHADER_PIXEL_LOCAL_STORAGE));
+        this->setupPixelLocalStorage(args);
+        fHWPLSEnabled = true;
+    }
+    if (plsState == GrPixelLocalStorageState::kFinish_GrPixelLocalStorageState) {
+        GrStencilSettings stencil;
+        stencil.setDisabled();
+        this->flushStencil(stencil);
+    }
+
     size_t indexOffsetInBytes = 0;
     this->setupGeometry(*args.fPrimitiveProcessor, vertices, &indexOffsetInBytes);
 
@@ -2373,6 +2512,16 @@
         GL_CALL(DrawArrays(gPrimitiveType2GLMode[vertices.primitiveType()], 0,
                            vertices.vertexCount()));
     }
+
+    if (fHWPLSEnabled && plsState == GrPixelLocalStorageState::kFinish_GrPixelLocalStorageState) {
+        // PLS draws always involve multiple draws, finishing up with a non-PLS
+        // draw that writes to the color buffer. That draw ends up here; we wait
+        // until after it is complete to actually disable PLS.
+        GL_CALL(Disable(GR_GL_SHADER_PIXEL_LOCAL_STORAGE));
+        fHWPLSEnabled = false;
+        this->disableScissor();
+    }
+
 #if SWAP_PER_DRAW
     glFlush();
     #if defined(SK_BUILD_FOR_MAC)
@@ -2387,6 +2536,57 @@
 #endif
 }
 
+void GrGLGpu::stampRectUsingProgram(GrGLuint program, const SkRect& bounds, GrGLint posXformUniform, 
+                                    GrGLuint arrayBuffer) {
+    GL_CALL(UseProgram(program));
+    this->fHWGeometryState.setVertexArrayID(this, 0);
+
+    GrGLAttribArrayState* attribs =
+            this->fHWGeometryState.bindArrayAndBufferToDraw(this, arrayBuffer);
+    attribs->set(this, 0, arrayBuffer, 2, GR_GL_FLOAT, false, 2 * sizeof(GrGLfloat), 0);
+    attribs->disableUnusedArrays(this, 0x1);
+
+    GL_CALL(Uniform4f(posXformUniform, bounds.width(), bounds.height(), bounds.left(), 
+                      bounds.top()));
+
+    GrXferProcessor::BlendInfo blendInfo;
+    blendInfo.reset();
+    this->flushBlend(blendInfo, GrSwizzle());
+    this->flushColorWrite(true);
+    this->flushDrawFace(GrPipelineBuilder::kBoth_DrawFace);
+    if (!fHWStencilSettings.isDisabled()) {
+        GL_CALL(Disable(GR_GL_STENCIL_TEST));
+    }
+    GL_CALL(DrawArrays(GR_GL_TRIANGLE_STRIP, 0, 4));
+    GL_CALL(UseProgram(fHWProgramID));
+    if (!fHWStencilSettings.isDisabled()) {
+        GL_CALL(Enable(GR_GL_STENCIL_TEST));
+    }
+}
+
+void GrGLGpu::setupPixelLocalStorage(const DrawArgs& args) {
+    fPLSHasBeenUsed = true;
+    const SkRect& bounds = 
+            static_cast<const GrPLSGeometryProcessor*>(args.fPrimitiveProcessor)->getBounds();
+    // setup pixel local storage -- this means capturing and storing the current framebuffer color
+    // and initializing the winding counts to zero
+    GrRenderTarget* rt = args.fPipeline->getRenderTarget();
+    SkScalar width = SkIntToScalar(rt->width());
+    SkScalar height = SkIntToScalar(rt->height());
+    // dst rect edges in NDC (-1 to 1)
+    // having some issues with rounding, just expand the bounds by 1 and trust the scissor to keep
+    // it contained properly
+    GrGLfloat dx0 = 2.0f * (bounds.left() - 1) / width - 1.0f;
+    GrGLfloat dx1 = 2.0f * (bounds.right() + 1) / width - 1.0f;
+    GrGLfloat dy0 = -2.0f * (bounds.top() - 1) / height + 1.0f;
+    GrGLfloat dy1 = -2.0f * (bounds.bottom() + 1) / height + 1.0f;
+    SkRect deviceBounds = SkRect::MakeXYWH(dx0, dy0, dx1 - dx0, dy1 - dy0);
+    
+    GL_CALL(Enable(GR_GL_FETCH_PER_SAMPLE_ARM));
+    this->stampRectUsingProgram(fPLSSetupProgram.fProgram, deviceBounds, 
+                                fPLSSetupProgram.fPosXformUniform, fPLSSetupProgram.fArrayBuffer);
+}
+
 void GrGLGpu::onResolveRenderTarget(GrRenderTarget* target) {
     GrGLRenderTarget* rt = static_cast<GrGLRenderTarget*>(target);
     if (rt->needsResolve()) {
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 63f1247..6f194dc 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -134,6 +134,8 @@
 
     void drawDebugWireRect(GrRenderTarget*, const SkIRect&, GrColor) override;
 
+    void performFlushWorkaround() override;
+
 private:
     GrGLGpu(GrGLContext* ctx, GrContext* context);
 
@@ -234,6 +236,11 @@
                                       const SkIRect& srcRect,
                                       const SkIPoint& dstPoint);
 
+    void stampRectUsingProgram(GrGLuint program, const SkRect& bounds, GrGLint posXformUniform, 
+                               GrGLuint arrayBuffer);
+
+    void setupPixelLocalStorage(const DrawArgs& args);
+
     static bool BlendCoeffReferencesConstant(GrBlendCoeff coeff);
 
     class ProgramCache : public ::SkNoncopyable {
@@ -355,6 +362,8 @@
     void createWireRectProgram();
     void createUnitRectBuffer();
 
+    void createPLSSetupProgram();
+
     // GL program-related state
     ProgramCache*               fProgramCache;
 
@@ -565,6 +574,15 @@
         }
     }
 
+    struct {
+        GrGLuint    fProgram;
+        GrGLint     fPosXformUniform;
+        GrGLuint    fArrayBuffer;
+    } fPLSSetupProgram;
+
+    bool fHWPLSEnabled;
+    bool fPLSHasBeenUsed;
+
     typedef GrGpu INHERITED;
     friend class GrGLPathRendering; // For accessing setTextureUnit.
 };
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 007d967..8ef188b 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -89,9 +89,12 @@
 
     this->setFragmentData(primProc, pipeline, textureBindings);
 
-    const GrXferProcessor& xp = pipeline.getXferProcessor();
-    fXferProcessor->setData(fProgramDataManager, xp);
-    append_texture_bindings(xp, textureBindings);
+    if (primProc.getPixelLocalStorageState() != 
+        GrPixelLocalStorageState::kDraw_GrPixelLocalStorageState) {
+        const GrXferProcessor& xp = pipeline.getXferProcessor();
+        fXferProcessor->setData(fProgramDataManager, xp);
+        append_texture_bindings(xp, textureBindings);
+    }
 }
 
 void GrGLProgram::setFragmentData(const GrPrimitiveProcessor& primProc,
diff --git a/src/gpu/gl/GrGLUniformHandler.cpp b/src/gpu/gl/GrGLUniformHandler.cpp
index 1ddb789..5335c19 100644
--- a/src/gpu/gl/GrGLUniformHandler.cpp
+++ b/src/gpu/gl/GrGLUniformHandler.cpp
@@ -26,7 +26,7 @@
     SkDEBUGCODE(static const uint32_t kVisibilityMask = kVertex_Visibility | kFragment_Visibility);
     SkASSERT(0 == (~kVisibilityMask & visibility));
     SkASSERT(0 != visibility);
-    SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsFloatType(type));
+    SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsNumeric(type));
 
     UniformInfo& uni = fUniforms.push_back();
     uni.fVariable.setType(type);
diff --git a/src/gpu/gl/GrGLVertexArray.h b/src/gpu/gl/GrGLVertexArray.h
index ebdb681..0a5dea6 100644
--- a/src/gpu/gl/GrGLVertexArray.h
+++ b/src/gpu/gl/GrGLVertexArray.h
@@ -24,7 +24,6 @@
 };
 
 static inline const GrGLAttribLayout& GrGLAttribTypeToLayout(GrVertexAttribType type) {
-    SkASSERT(type >= 0 && type < kGrVertexAttribTypeCount);
     static const GrGLAttribLayout kLayouts[kGrVertexAttribTypeCount] = {
         {1, GR_GL_FLOAT, false},         // kFloat_GrVertexAttribType
         {2, GR_GL_FLOAT, false},         // kVec2f_GrVertexAttribType
@@ -33,6 +32,7 @@
         {1, GR_GL_UNSIGNED_BYTE, true},  // kUByte_GrVertexAttribType
         {4, GR_GL_UNSIGNED_BYTE, true},  // kVec4ub_GrVertexAttribType
         {2, GR_GL_SHORT, false},         // kVec2s_GrVertexAttribType
+        {4, GR_GL_INT, false},           // kInt_GrVertexAttribType
     };
     GR_STATIC_ASSERT(0 == kFloat_GrVertexAttribType);
     GR_STATIC_ASSERT(1 == kVec2f_GrVertexAttribType);
@@ -41,6 +41,7 @@
     GR_STATIC_ASSERT(4 == kUByte_GrVertexAttribType);
     GR_STATIC_ASSERT(5 == kVec4ub_GrVertexAttribType);
     GR_STATIC_ASSERT(6 == kVec2s_GrVertexAttribType);
+    GR_STATIC_ASSERT(7 == kInt_GrVertexAttribType);
     GR_STATIC_ASSERT(SK_ARRAY_COUNT(kLayouts) == kGrVertexAttribTypeCount);
     return kLayouts[type];
 }
diff --git a/src/gpu/glsl/GrGLSL.h b/src/gpu/glsl/GrGLSL.h
index f2accc5..dc53d7b 100644
--- a/src/gpu/glsl/GrGLSL.h
+++ b/src/gpu/glsl/GrGLSL.h
@@ -100,6 +100,10 @@
             return "samplerExternalOES";
         case kSampler2DRect_GrSLType:
             return "sampler2DRect";
+        case kBool_GrSLType:
+            return "bool";
+        case kInt_GrSLType:
+            return "int";
         default:
             SkFAIL("Unknown shader var type.");
             return ""; // suppress warning
diff --git a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
index 90e2043..f97b854 100644
--- a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
+++ b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
@@ -7,6 +7,7 @@
 
 #include "GrGLSLFragmentShaderBuilder.h"
 #include "GrRenderTarget.h"
+#include "gl/GrGLGpu.h"
 #include "glsl/GrGLSL.h"
 #include "glsl/GrGLSLCaps.h"
 #include "glsl/GrGLSLProgramBuilder.h"
@@ -87,6 +88,14 @@
             }
             return true;
         }
+        case kPixelLocalStorage_GLSLFeature: {
+            if (fProgramBuilder->glslCaps()->pixelLocalStorageSize() <= 0) {
+                return false;
+            }
+            this->addFeature(1 << kPixelLocalStorage_GLSLFeature,
+                             "GL_EXT_shader_pixel_local_storage");
+            return true;
+        }
         default:
             SkFAIL("Unexpected GLSLFeature requested.");
             return false;
@@ -160,6 +169,11 @@
 const char* GrGLSLFragmentShaderBuilder::dstColor() {
     fHasReadDstColor = true;
 
+    const char* override = fProgramBuilder->primitiveProcessor().getDestColorOverride();
+    if (override != nullptr) {
+        return override;
+    }
+
     const GrGLSLCaps* glslCaps = fProgramBuilder->glslCaps();
     if (glslCaps->fbFetchSupport()) {
         this->addFeature(1 << (GrGLSLFragmentShaderBuilder::kLastGLSLPrivateFeature + 1),
@@ -227,6 +241,13 @@
     return fHasCustomColorOutput ? DeclaredColorOutputName() : "gl_FragColor";
 }
 
+void GrGLSLFragmentBuilder::declAppendf(const char* fmt, ...) {
+    va_list argp;
+    va_start(argp, fmt);
+    inputs().appendVAList(fmt, argp);
+    va_end(argp);    
+}
+
 const char* GrGLSLFragmentShaderBuilder::getSecondaryColorOutputName() const {
     const GrGLSLCaps& caps = *fProgramBuilder->glslCaps();
     return caps.mustDeclareFragmentShaderOutput() ? DeclaredSecondaryColorOutputName()
diff --git a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.h b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.h
index 820cf17..e998458 100644
--- a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.h
+++ b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.h
@@ -33,7 +33,8 @@
      */
     enum GLSLFeature {
         kStandardDerivatives_GLSLFeature = 0,
-        kLastGLSLFeature = kStandardDerivatives_GLSLFeature
+        kPixelLocalStorage_GLSLFeature = 1,
+        kLastGLSLFeature = kPixelLocalStorage_GLSLFeature
     };
 
     /**
@@ -66,6 +67,8 @@
     bool hasCustomColorOutput() const { return fHasCustomColorOutput; }
     bool hasSecondaryOutput() const { return fHasSecondaryOutput; }
 
+    void declAppendf(const char* fmt, ...);
+
 protected:
     bool fHasCustomColorOutput;
     bool fHasSecondaryOutput;
diff --git a/src/gpu/glsl/GrGLSLPLSPathRendering.h b/src/gpu/glsl/GrGLSLPLSPathRendering.h
new file mode 100644
index 0000000..9a1625d
--- /dev/null
+++ b/src/gpu/glsl/GrGLSLPLSPathRendering.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#define GR_GL_PLS_DSTCOLOR_NAME  "pls.dstColor"
+#define GR_GL_PLS_PATH_DATA_DECL "__pixel_localEXT PLSData {\n"\
+                                 "    layout(rgba8i) ivec4 windings;\n"\
+                                 "    layout(rgba8) vec4 dstColor;\n"\
+                                 "} pls;\n"
+
diff --git a/src/gpu/glsl/GrGLSLProgramBuilder.cpp b/src/gpu/glsl/GrGLSLProgramBuilder.cpp
index 6e0e95f..900b6ee 100644
--- a/src/gpu/glsl/GrGLSLProgramBuilder.cpp
+++ b/src/gpu/glsl/GrGLSLProgramBuilder.cpp
@@ -53,9 +53,13 @@
     this->emitAndInstallFragProcs(0, this->pipeline().numColorFragmentProcessors(), inputColor);
     this->emitAndInstallFragProcs(this->pipeline().numColorFragmentProcessors(), numProcs,
                                   inputCoverage);
-    this->emitAndInstallXferProc(this->pipeline().getXferProcessor(), *inputColor, *inputCoverage,
-                                 this->pipeline().ignoresCoverage());
-    this->emitFSOutputSwizzle(this->pipeline().getXferProcessor().hasSecondaryOutput());
+    if (primProc.getPixelLocalStorageState() != 
+        GrPixelLocalStorageState::kDraw_GrPixelLocalStorageState) {
+        this->emitAndInstallXferProc(this->pipeline().getXferProcessor(), *inputColor, 
+                                     *inputCoverage, this->pipeline().ignoresCoverage(),
+                                     primProc.getPixelLocalStorageState());
+        this->emitFSOutputSwizzle(this->pipeline().getXferProcessor().hasSecondaryOutput());
+    }
     return true;
 }
 
@@ -151,7 +155,8 @@
 void GrGLSLProgramBuilder::emitAndInstallXferProc(const GrXferProcessor& xp,
                                                   const GrGLSLExpr4& colorIn,
                                                   const GrGLSLExpr4& coverageIn,
-                                                  bool ignoresCoverage) {
+                                                  bool ignoresCoverage,
+                                                  GrPixelLocalStorageState plsState) {
     // Program builders have a bit of state we need to clear with each effect
     AutoStageAdvance adv(this);
 
@@ -174,6 +179,7 @@
     SkSTArray<4, GrGLSLTextureSampler> samplers(xp.numTextures());
     this->emitSamplers(xp, &samplers);
 
+    bool usePLSDstRead = (plsState == GrPixelLocalStorageState::kFinish_GrPixelLocalStorageState);
     GrGLSLXferProcessor::EmitArgs args(&fFS,
                                        this->uniformHandler(),
                                        this->glslCaps(),
@@ -181,7 +187,8 @@
                                        ignoresCoverage ? nullptr : coverageIn.c_str(),
                                        fFS.getPrimaryColorOutputName(),
                                        fFS.getSecondaryColorOutputName(),
-                                       samplers);
+                                       samplers,
+                                       usePLSDstRead);
     fXferProcessor->emitCode(args);
 
     // We have to check that effects and the code they emit are consistent, ie if an effect
diff --git a/src/gpu/glsl/GrGLSLProgramBuilder.h b/src/gpu/glsl/GrGLSLProgramBuilder.h
index 964d320..2249c3c 100644
--- a/src/gpu/glsl/GrGLSLProgramBuilder.h
+++ b/src/gpu/glsl/GrGLSLProgramBuilder.h
@@ -131,7 +131,8 @@
     void emitAndInstallXferProc(const GrXferProcessor&,
                                 const GrGLSLExpr4& colorIn,
                                 const GrGLSLExpr4& coverageIn,
-                                bool ignoresCoverage);
+                                bool ignoresCoverage,
+                                GrPixelLocalStorageState plsState);
     void emitFSOutputSwizzle(bool hasSecondaryOutput);
 
     void verify(const GrPrimitiveProcessor&);
diff --git a/src/gpu/glsl/GrGLSLShaderVar.h b/src/gpu/glsl/GrGLSLShaderVar.h
index 1aedb91..e26a75c 100644
--- a/src/gpu/glsl/GrGLSLShaderVar.h
+++ b/src/gpu/glsl/GrGLSLShaderVar.h
@@ -73,7 +73,7 @@
              const char* layoutQualifier = nullptr,
              bool useUniformFloatArrays = USE_UNIFORM_FLOAT_ARRAYS) {
         SkASSERT(kVoid_GrSLType != type);
-        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsFloatType(type));
+        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsNumeric(type));
         INHERITED::set(type, name, typeModifier, precision);
         fLayoutQualifier = layoutQualifier;
         fUseUniformFloatArrays = useUniformFloatArrays;
@@ -89,7 +89,7 @@
              const char* layoutQualifier = nullptr,
              bool useUniformFloatArrays = USE_UNIFORM_FLOAT_ARRAYS) {
         SkASSERT(kVoid_GrSLType != type);
-        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsFloatType(type));
+        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsNumeric(type));
         INHERITED::set(type, name, typeModifier, precision);
         fLayoutQualifier = layoutQualifier;
         fUseUniformFloatArrays = useUniformFloatArrays;
@@ -106,7 +106,7 @@
              const char* layoutQualifier = nullptr,
              bool useUniformFloatArrays = USE_UNIFORM_FLOAT_ARRAYS) {
         SkASSERT(kVoid_GrSLType != type);
-        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsFloatType(type));
+        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsNumeric(type));
         INHERITED::set(type, name, typeModifier, precision, count);
         fLayoutQualifier = layoutQualifier;
         fUseUniformFloatArrays = useUniformFloatArrays;
@@ -123,7 +123,7 @@
              const char* layoutQualifier = nullptr,
              bool useUniformFloatArrays = USE_UNIFORM_FLOAT_ARRAYS) {
         SkASSERT(kVoid_GrSLType != type);
-        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsFloatType(type));
+        SkASSERT(kDefault_GrSLPrecision == precision || GrSLTypeIsNumeric(type));
         INHERITED::set(type, name, typeModifier, precision, count);
         fLayoutQualifier = layoutQualifier;
         fUseUniformFloatArrays = useUniformFloatArrays;
@@ -140,16 +140,22 @@
      * Write a declaration of this variable to out.
      */
     void appendDecl(const GrGLSLCaps* glslCaps, SkString* out) const {
-        SkASSERT(kDefault_GrSLPrecision == fPrecision || GrSLTypeIsFloatType(fType));
+        SkASSERT(kDefault_GrSLPrecision == fPrecision || GrSLTypeIsNumeric(fType));
         if (!fLayoutQualifier.isEmpty()) {
             out->appendf("layout(%s) ", fLayoutQualifier.c_str());
         }
         if (this->getTypeModifier() != kNone_TypeModifier) {
-           out->append(TypeModifierString(glslCaps, this->getTypeModifier()));
-           out->append(" ");
+            if (GrSLTypeIsIntType(fType) && (this->getTypeModifier() == kVaryingIn_TypeModifier ||
+                                             this->getTypeModifier() == kVaryingOut_TypeModifier)) {
+                out->append("flat ");
+            }
+            out->append(TypeModifierString(glslCaps, this->getTypeModifier()));
+            out->append(" ");
         }
-        out->append(PrecisionString(glslCaps, fPrecision));
         GrSLType effectiveType = this->getType();
+        if (effectiveType != kBool_GrSLType) {
+            out->append(PrecisionString(glslCaps, fPrecision));
+        }
         if (this->isArray()) {
             if (this->isUnsizedArray()) {
                 out->appendf("%s %s[]",
diff --git a/src/gpu/glsl/GrGLSLXferProcessor.h b/src/gpu/glsl/GrGLSLXferProcessor.h
index 37e684f..478956d 100644
--- a/src/gpu/glsl/GrGLSLXferProcessor.h
+++ b/src/gpu/glsl/GrGLSLXferProcessor.h
@@ -32,7 +32,8 @@
                  const char* inputCoverage,
                  const char* outputPrimary,
                  const char* outputSecondary,
-                 const TextureSamplerArray& samplers)
+                 const TextureSamplerArray& samplers,
+                 const bool usePLSDstRead)
             : fXPFragBuilder(fragBuilder)
             , fUniformHandler(uniformHandler)
             , fGLSLCaps(caps)
@@ -41,7 +42,8 @@
             , fInputCoverage(inputCoverage)
             , fOutputPrimary(outputPrimary)
             , fOutputSecondary(outputSecondary)
-            , fSamplers(samplers) {}
+            , fSamplers(samplers)
+            , fUsePLSDstRead(usePLSDstRead) {}
 
         GrGLSLXPFragmentBuilder* fXPFragBuilder;
         GrGLSLUniformHandler* fUniformHandler;
@@ -52,6 +54,7 @@
         const char* fOutputPrimary;
         const char* fOutputSecondary;
         const TextureSamplerArray& fSamplers;
+        bool fUsePLSDstRead;
     };
     /**
      * This is similar to emitCode() in the base class, except it takes a full shader builder.