Add perspective support to GrTextureOp.

Bug: skia:
Change-Id: Idea4ffae37dc2c2f339af60a2b74ded476091758
Reviewed-on: https://skia-review.googlesource.com/127600
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/GrQuad.cpp b/src/gpu/GrQuad.cpp
index 724a827..93bd761 100644
--- a/src/gpu/GrQuad.cpp
+++ b/src/gpu/GrQuad.cpp
@@ -43,3 +43,43 @@
         y.store(fY);
     }
 }
+
+GrPerspQuad::GrPerspQuad(const SkRect& rect, const SkMatrix& m) {
+    SkMatrix::TypeMask tm = m.getType();
+    if (tm <= (SkMatrix::kScale_Mask | SkMatrix::kTranslate_Mask)) {
+        auto r = Sk4f::Load(&rect);
+        const Sk4f t(m.getTranslateX(), m.getTranslateY(), m.getTranslateX(), m.getTranslateY());
+        if (tm <= SkMatrix::kTranslate_Mask) {
+            r += t;
+        } else {
+            const Sk4f s(m.getScaleX(), m.getScaleY(), m.getScaleX(), m.getScaleY());
+            r = r * s + t;
+        }
+        SkNx_shuffle<0, 0, 2, 2>(r).store(fX);
+        SkNx_shuffle<1, 3, 1, 3>(r).store(fY);
+        fW[0] = fW[1] = fW[2] = fW[3] = 1.f;
+        fIW[0] = fIW[1] = fIW[2] = fIW[3] = 1.f;
+    } else {
+        Sk4f rx(rect.fLeft, rect.fLeft, rect.fRight, rect.fRight);
+        Sk4f ry(rect.fTop, rect.fBottom, rect.fTop, rect.fBottom);
+        Sk4f sx(m.getScaleX());
+        Sk4f kx(m.getSkewX());
+        Sk4f tx(m.getTranslateX());
+        Sk4f ky(m.getSkewY());
+        Sk4f sy(m.getScaleY());
+        Sk4f ty(m.getTranslateY());
+        SkNx_fma(sx, rx, SkNx_fma(kx, ry, tx)).store(fX);
+        SkNx_fma(ky, rx, SkNx_fma(sy, ry, ty)).store(fY);
+        if (m.hasPerspective()) {
+            Sk4f w0(m.getPerspX());
+            Sk4f w1(m.getPerspY());
+            Sk4f w2(m.get(SkMatrix::kMPersp2));
+            auto w = SkNx_fma(w0, rx, SkNx_fma(w1, ry, w2));
+            w.store(fW);
+            w.invert().store(fIW);
+        } else {
+            fW[0] = fW[1] = fW[2] = fW[3] = 1.f;
+            fIW[0] = fIW[1] = fIW[2] = fIW[3] = 1.f;
+        }
+    }
+}
diff --git a/src/gpu/GrQuad.h b/src/gpu/GrQuad.h
index 62f025d..74e2356 100644
--- a/src/gpu/GrQuad.h
+++ b/src/gpu/GrQuad.h
@@ -11,6 +11,7 @@
 #include "SkMatrix.h"
 #include "SkNx.h"
 #include "SkPoint.h"
+#include "SkPoint3.h"
 
 /**
  * GrQuad is a collection of 4 points which can be used to represent an arbitrary quadrilateral. The
@@ -53,4 +54,37 @@
     float fY[4];
 };
 
+class GrPerspQuad {
+public:
+    GrPerspQuad() = default;
+
+    GrPerspQuad(const SkRect&, const SkMatrix&);
+
+    GrPerspQuad& operator=(const GrPerspQuad&) = default;
+
+    SkPoint3 point(int i) const { return {fX[i], fY[i], fW[i]}; }
+
+    SkRect bounds() {
+        auto x = this->x4f() * this->iw4f();
+        auto y = this->y4f() * this->iw4f();
+        return {x.min(), y.min(), x.max(), y.max()};
+    }
+
+    float x(int i) const { return fX[i]; }
+    float y(int i) const { return fY[i]; }
+    float w(int i) const { return fW[i]; }
+    float iw(int i) const { return fIW[i]; }
+
+    Sk4f x4f() const { return Sk4f::Load(fX); }
+    Sk4f y4f() const { return Sk4f::Load(fY); }
+    Sk4f w4f() const { return Sk4f::Load(fW); }
+    Sk4f iw4f() const { return Sk4f::Load(fIW); }
+
+private:
+    float fX[4];
+    float fY[4];
+    float fW[4];
+    float fIW[4];  // 1/w
+};
+
 #endif
diff --git a/src/gpu/GrRenderTargetContext.cpp b/src/gpu/GrRenderTargetContext.cpp
index ded5049..86d1dde 100644
--- a/src/gpu/GrRenderTargetContext.cpp
+++ b/src/gpu/GrRenderTargetContext.cpp
@@ -763,16 +763,15 @@
     return !SkScalarIsInt(x) || !SkScalarIsInt(y);
 }
 
-void GrRenderTargetContext::drawTextureAffine(const GrClip& clip, sk_sp<GrTextureProxy> proxy,
-                                              GrSamplerState::Filter filter, GrColor color,
-                                              const SkRect& srcRect, const SkRect& dstRect, GrAA aa,
-                                              const SkMatrix& viewMatrix,
-                                              sk_sp<GrColorSpaceXform> colorSpaceXform) {
+void GrRenderTargetContext::drawTexture(const GrClip& clip, sk_sp<GrTextureProxy> proxy,
+                                        GrSamplerState::Filter filter, GrColor color,
+                                        const SkRect& srcRect, const SkRect& dstRect, GrAA aa,
+                                        const SkMatrix& viewMatrix,
+                                        sk_sp<GrColorSpaceXform> colorSpaceXform) {
     ASSERT_SINGLE_OWNER
     RETURN_IF_ABANDONED
     SkDEBUGCODE(this->validate();)
-    GR_CREATE_TRACE_MARKER_CONTEXT("GrRenderTargetContext", "drawTextureAffine", fContext);
-    SkASSERT(!viewMatrix.hasPerspective());
+    GR_CREATE_TRACE_MARKER_CONTEXT("GrRenderTargetContext", "drawTexture", fContext);
     if (filter != GrSamplerState::Filter::kNearest && !must_filter(srcRect, dstRect, viewMatrix)) {
         filter = GrSamplerState::Filter::kNearest;
     }
diff --git a/src/gpu/GrRenderTargetContext.h b/src/gpu/GrRenderTargetContext.h
index 8d677ae..1a68444 100644
--- a/src/gpu/GrRenderTargetContext.h
+++ b/src/gpu/GrRenderTargetContext.h
@@ -146,11 +146,11 @@
      * Creates an op that draws a subrectangle of a texture. The passed color is modulated by the
      * texture's color. 'srcRect' specifies the rectangle of the texture to draw. 'dstRect'
      * specifies the rectangle to draw in local coords which will be transformed by 'viewMatrix' to
-     * device space. This asserts that the view matrix does not have perspective.
+     * device space.
      */
-    void drawTextureAffine(const GrClip& clip, sk_sp<GrTextureProxy>, GrSamplerState::Filter,
-                           GrColor, const SkRect& srcRect, const SkRect& dstRect, GrAA aa,
-                           const SkMatrix& viewMatrix, sk_sp<GrColorSpaceXform>);
+    void drawTexture(const GrClip& clip, sk_sp<GrTextureProxy>, GrSamplerState::Filter, GrColor,
+                     const SkRect& srcRect, const SkRect& dstRect, GrAA aa,
+                     const SkMatrix& viewMatrix, sk_sp<GrColorSpaceXform>);
 
     /**
      * Draw a roundrect using a paint.
diff --git a/src/gpu/SkGpuDevice_drawTexture.cpp b/src/gpu/SkGpuDevice_drawTexture.cpp
index 16f8ee8..70c89b5 100644
--- a/src/gpu/SkGpuDevice_drawTexture.cpp
+++ b/src/gpu/SkGpuDevice_drawTexture.cpp
@@ -88,21 +88,20 @@
 
 /**
  * Checks whether the paint, matrix, and constraint are compatible with using
- * GrRenderTargetContext::drawTextureAffine. It is more effecient than the GrTextureProducer
+ * GrRenderTargetContext::drawTexture. It is more efficient than the GrTextureProducer
  * general case.
  */
-static bool can_use_draw_texture_affine(const SkPaint& paint, GrAA aa, const SkMatrix& ctm,
-                                        SkCanvas::SrcRectConstraint constraint) {
+static bool can_use_draw_texture(const SkPaint& paint, GrAA aa, const SkMatrix& ctm,
+                                 SkCanvas::SrcRectConstraint constraint) {
     return (!paint.getColorFilter() && !paint.getShader() && !paint.getMaskFilter() &&
             !paint.getImageFilter() && paint.getFilterQuality() < kMedium_SkFilterQuality &&
-            paint.getBlendMode() == SkBlendMode::kSrcOver && !ctm.hasPerspective() &&
+            paint.getBlendMode() == SkBlendMode::kSrcOver &&
             SkCanvas::kFast_SrcRectConstraint == constraint);
 }
 
-static void draw_texture_affine(const SkPaint& paint, const SkMatrix& ctm, const SkRect* src,
-                                const SkRect* dst, GrAA aa, sk_sp<GrTextureProxy> proxy,
-                                SkColorSpace* colorSpace, const GrClip& clip,
-                                GrRenderTargetContext* rtc) {
+static void draw_texture(const SkPaint& paint, const SkMatrix& ctm, const SkRect* src,
+                         const SkRect* dst, GrAA aa, sk_sp<GrTextureProxy> proxy,
+                         SkColorSpace* colorSpace, const GrClip& clip, GrRenderTargetContext* rtc) {
     SkASSERT(!(SkToBool(src) && !SkToBool(dst)));
     SkRect srcRect = src ? *src : SkRect::MakeWH(proxy->width(), proxy->height());
     SkRect dstRect = dst ? *dst : srcRect;
@@ -130,8 +129,8 @@
     GrColor color = GrPixelConfigIsAlphaOnly(proxy->config())
                             ? SkColorToPremulGrColor(paint.getColor())
                             : SkColorAlphaToGrColor(paint.getColor());
-    rtc->drawTextureAffine(clip, std::move(proxy), filter, color, srcRect, dstRect, aa, ctm,
-                           std::move(csxf));
+    rtc->drawTexture(clip, std::move(proxy), filter, color, srcRect, dstRect, aa, ctm,
+                     std::move(csxf));
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -142,9 +141,9 @@
                                          SkCanvas::SrcRectConstraint constraint,
                                          const SkMatrix& viewMatrix, const SkPaint& paint) {
     GrAA aa = GrAA(paint.isAntiAlias());
-    if (can_use_draw_texture_affine(paint, aa, this->ctm(), constraint)) {
-        draw_texture_affine(paint, viewMatrix, srcRect, dstRect, aa, std::move(proxy), colorSpace,
-                            this->clip(), fRenderTargetContext.get());
+    if (can_use_draw_texture(paint, aa, this->ctm(), constraint)) {
+        draw_texture(paint, viewMatrix, srcRect, dstRect, aa, std::move(proxy), colorSpace,
+                     this->clip(), fRenderTargetContext.get());
         return;
     }
     GrTextureAdjuster adjuster(this->context(), std::move(proxy), alphaType, pinnedUniqueID,
@@ -157,7 +156,7 @@
                                    SkCanvas::SrcRectConstraint constraint,
                                    const SkMatrix& viewMatrix, const SkPaint& paint) {
     GrAA aa = GrAA(paint.isAntiAlias());
-    if (can_use_draw_texture_affine(paint, aa, viewMatrix, constraint)) {
+    if (can_use_draw_texture(paint, aa, viewMatrix, constraint)) {
         sk_sp<SkColorSpace> cs;
         // We've done enough checks above to allow us to pass ClampNearest() and not check for
         // scaling adjustments.
@@ -167,8 +166,8 @@
         if (!proxy) {
             return;
         }
-        draw_texture_affine(paint, viewMatrix, srcRect, dstRect, aa, std::move(proxy), cs.get(),
-                            this->clip(), fRenderTargetContext.get());
+        draw_texture(paint, viewMatrix, srcRect, dstRect, aa, std::move(proxy), cs.get(),
+                     this->clip(), fRenderTargetContext.get());
         return;
     }
     this->drawTextureProducer(maker, srcRect, dstRect, constraint, viewMatrix, paint);
diff --git a/src/gpu/ops/GrNonAAFillRectOp.cpp b/src/gpu/ops/GrNonAAFillRectOp.cpp
index 6386891..269f876 100644
--- a/src/gpu/ops/GrNonAAFillRectOp.cpp
+++ b/src/gpu/ops/GrNonAAFillRectOp.cpp
@@ -72,8 +72,7 @@
                       const GrQuad* localQuad) {
     SkPoint* positions = reinterpret_cast<SkPoint*>(vertices);
 
-    SkPointPriv::SetRectTriStrip(positions, rect.fLeft, rect.fTop, rect.fRight, rect.fBottom,
-            vertexStride);
+    SkPointPriv::SetRectTriStrip(positions, rect, vertexStride);
 
     if (viewMatrix) {
         SkMatrixPriv::MapPointsWithStride(*viewMatrix, positions, vertexStride, kVertsPerRect);
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index 91a6927..5b7a30d 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -38,18 +38,24 @@
  */
 class TextureGeometryProcessor : public GrGeometryProcessor {
 public:
-    struct Vertex {
-        SkPoint fPosition;
+    template <typename P> struct Vertex {
+        static constexpr GrAA kAA = GrAA::kNo;
+        static constexpr bool kIsMultiTexture = false;
+        using Position = P;
+        P fPosition;
         SkPoint fTextureCoords;
         GrColor fColor;
     };
-    struct AAVertex : public Vertex {
+    template <typename P> struct AAVertex : Vertex<P> {
+        static constexpr GrAA kAA = GrAA::kYes;
         SkPoint3 fEdges[4];
     };
-    struct MultiTextureVertex : Vertex {
+    template <typename P> struct MultiTextureVertex : Vertex<P> {
+        static constexpr bool kIsMultiTexture = true;
         int fTextureIdx;
     };
-    struct AAMultiTextureVertex : MultiTextureVertex {
+    template <typename P> struct AAMultiTextureVertex : MultiTextureVertex<P> {
+        static constexpr GrAA kAA = GrAA::kYes;
         SkPoint3 fEdges[4];
     };
 
@@ -68,15 +74,16 @@
 
     static sk_sp<GrGeometryProcessor> Make(sk_sp<GrTextureProxy> proxies[], int proxyCnt,
                                            sk_sp<GrColorSpaceXform> csxf, bool coverageAA,
-                                           const GrSamplerState::Filter filters[],
+                                           bool perspective, const GrSamplerState::Filter filters[],
                                            const GrShaderCaps& caps) {
         // We use placement new to avoid always allocating space for kMaxTextures TextureSampler
         // instances.
         int samplerCnt = NumSamplersToUse(proxyCnt, caps);
         size_t size = sizeof(TextureGeometryProcessor) + sizeof(TextureSampler) * (samplerCnt - 1);
         void* mem = GrGeometryProcessor::operator new(size);
-        return sk_sp<TextureGeometryProcessor>(new (mem) TextureGeometryProcessor(
-                proxies, proxyCnt, samplerCnt, std::move(csxf), coverageAA, filters, caps));
+        return sk_sp<TextureGeometryProcessor>(
+                new (mem) TextureGeometryProcessor(proxies, proxyCnt, samplerCnt, std::move(csxf),
+                                                   coverageAA, perspective, filters, caps));
     }
 
     ~TextureGeometryProcessor() override {
@@ -90,7 +97,9 @@
 
     void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const override {
         b->add32(GrColorSpaceXform::XformKey(fColorSpaceXform.get()));
-        b->add32(static_cast<uint32_t>(this->usesCoverageEdgeAA()));
+        uint32_t x = this->usesCoverageEdgeAA() ? 0 : 1;
+        x |= kFloat3_GrVertexAttribType == fPositions.fType ? 0 : 2;
+        b->add32(x);
     }
 
     GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps& caps) const override {
@@ -111,9 +120,12 @@
                 const auto& textureGP = args.fGP.cast<TextureGeometryProcessor>();
                 fColorSpaceXformHelper.emitCode(
                         args.fUniformHandler, textureGP.fColorSpaceXform.get());
-                args.fVaryingHandler->setNoPerspective();
+                if (kFloat2_GrVertexAttribType == textureGP.fPositions.fType) {
+                    args.fVaryingHandler->setNoPerspective();
+                }
                 args.fVaryingHandler->emitAttributes(textureGP);
-                this->writeOutputPosition(args.fVertBuilder, gpArgs, textureGP.fPositions.fName);
+                gpArgs->fPositionVar = textureGP.fPositions.asShaderVar();
+
                 this->emitTransforms(args.fVertBuilder,
                                      args.fVaryingHandler,
                                      args.fUniformHandler,
@@ -154,21 +166,38 @@
                 args.fFragBuilder->codeAppend(";");
                 if (textureGP.usesCoverageEdgeAA()) {
                     const char* aaDistName = nullptr;
-                    // When interpolation is innacurate we perform the evaluation of the edge
+                    bool mulByFragCoordW = false;
+                    // When interpolation is inaccurate we perform the evaluation of the edge
                     // equations in the fragment shader rather than interpolating values computed
                     // in the vertex shader.
                     if (!args.fShaderCaps->interpolantsAreInaccurate()) {
                         GrGLSLVarying aaDistVarying(kFloat4_GrSLType,
                                                     GrGLSLVarying::Scope::kVertToFrag);
-                        args.fVaryingHandler->addVarying("aaDists", &aaDistVarying);
-                        args.fVertBuilder->codeAppendf(
-                                R"(%s = float4(dot(aaEdge0.xy, %s.xy) + aaEdge0.z,
-                                               dot(aaEdge1.xy, %s.xy) + aaEdge1.z,
-                                               dot(aaEdge2.xy, %s.xy) + aaEdge2.z,
-                                               dot(aaEdge3.xy, %s.xy) + aaEdge3.z);)",
-                                aaDistVarying.vsOut(), textureGP.fPositions.fName,
-                                textureGP.fPositions.fName, textureGP.fPositions.fName,
-                                textureGP.fPositions.fName);
+                        if (kFloat3_GrVertexAttribType == textureGP.fPositions.fType) {
+                            args.fVaryingHandler->addVarying("aaDists", &aaDistVarying);
+                            // The distance from edge equation e to homogenous point p=sk_Position
+                            // is e.x*p.x/p.wx + e.y*p.y/p.w + e.z. However, we want screen space
+                            // interpolation of this distance. We can do this by multiplying the
+                            // varying in the VS by p.w and then multiplying by sk_FragCoord.w in
+                            // the FS. So we output e.x*p.x + e.y*p.y + e.z * p.w
+                            args.fVertBuilder->codeAppendf(
+                                    R"(%s = float4(dot(aaEdge0, %s), dot(aaEdge1, %s),
+                                                   dot(aaEdge2, %s), dot(aaEdge3, %s));)",
+                                    aaDistVarying.vsOut(), textureGP.fPositions.fName,
+                                    textureGP.fPositions.fName, textureGP.fPositions.fName,
+                                    textureGP.fPositions.fName);
+                            mulByFragCoordW = true;
+                        } else {
+                            args.fVaryingHandler->addVarying("aaDists", &aaDistVarying);
+                            args.fVertBuilder->codeAppendf(
+                                    R"(%s = float4(dot(aaEdge0.xy, %s.xy) + aaEdge0.z,
+                                                   dot(aaEdge1.xy, %s.xy) + aaEdge1.z,
+                                                   dot(aaEdge2.xy, %s.xy) + aaEdge2.z,
+                                                   dot(aaEdge3.xy, %s.xy) + aaEdge3.z);)",
+                                    aaDistVarying.vsOut(), textureGP.fPositions.fName,
+                                    textureGP.fPositions.fName, textureGP.fPositions.fName,
+                                    textureGP.fPositions.fName);
+                        }
                         aaDistName = aaDistVarying.fsIn();
                     } else {
                         GrGLSLVarying aaEdgeVarying[4]{
@@ -199,6 +228,9 @@
                     args.fFragBuilder->codeAppendf(
                             "float mindist = min(min(%s.x, %s.y), min(%s.z, %s.w));",
                             aaDistName, aaDistName, aaDistName, aaDistName);
+                    if (mulByFragCoordW) {
+                        args.fFragBuilder->codeAppend("mindist *= sk_FragCoord.w;");
+                    }
                     args.fFragBuilder->codeAppendf("%s = float4(clamp(mindist, 0, 1));",
                                                    args.fOutputCoverage);
                 } else {
@@ -229,7 +261,7 @@
     }
 
     TextureGeometryProcessor(sk_sp<GrTextureProxy> proxies[], int proxyCnt, int samplerCnt,
-                             sk_sp<GrColorSpaceXform> csxf, bool coverageAA,
+                             sk_sp<GrColorSpaceXform> csxf, bool coverageAA, bool perspective,
                              const GrSamplerState::Filter filters[], const GrShaderCaps& caps)
             : INHERITED(kTextureGeometryProcessor_ClassID), fColorSpaceXform(std::move(csxf)) {
         SkASSERT(proxyCnt > 0 && samplerCnt >= proxyCnt);
@@ -242,7 +274,11 @@
             this->addTextureSampler(&fSamplers[i]);
         }
 
-        fPositions = this->addVertexAttrib("position", kFloat2_GrVertexAttribType);
+        if (perspective) {
+            fPositions = this->addVertexAttrib("position", kFloat3_GrVertexAttribType);
+        } else {
+            fPositions = this->addVertexAttrib("position", kFloat2_GrVertexAttribType);
+        }
         fTextureCoords = this->addVertexAttrib("textureCoords", kFloat2_GrVertexAttribType);
         fColors = this->addVertexAttrib("color", kUByte4_norm_GrVertexAttribType);
 
@@ -322,27 +358,37 @@
 
 namespace {
 // This is a class soley so it can be partially specialized (functions cannot be).
-template<GrAA, typename Vertex> class VertexAAHandler;
+template <typename Vertex, GrAA AA = Vertex::kAA, typename Position = typename Vertex::Position>
+class VertexAAHandler;
 
-template<typename Vertex> class VertexAAHandler<GrAA::kNo, Vertex> {
+template<typename Vertex> class VertexAAHandler<Vertex, GrAA::kNo, SkPoint> {
 public:
-    static void AssignPositionsAndTexCoords(Vertex* vertices, const GrQuad& quad,
+    static void AssignPositionsAndTexCoords(Vertex* vertices, const GrPerspQuad& quad,
                                             const SkRect& texRect) {
-        vertices[0].fPosition = quad.point(0);
-        vertices[0].fTextureCoords = {texRect.fLeft, texRect.fTop};
-        vertices[1].fPosition = quad.point(1);
-        vertices[1].fTextureCoords = {texRect.fLeft, texRect.fBottom};
-        vertices[2].fPosition = quad.point(2);
-        vertices[2].fTextureCoords = {texRect.fRight, texRect.fTop};
-        vertices[3].fPosition = quad.point(3);
-        vertices[3].fTextureCoords = {texRect.fRight, texRect.fBottom};
+        SkASSERT((quad.w4f() == Sk4f(1.f)).allTrue());
+        SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(Vertex));
+        for (int i = 0; i < 4; ++i) {
+            vertices[i].fPosition = {quad.x(i), quad.y(i)};
+        }
     }
 };
 
-template<typename Vertex> class VertexAAHandler<GrAA::kYes, Vertex> {
+template<typename Vertex> class VertexAAHandler<Vertex, GrAA::kNo, SkPoint3> {
 public:
-    static void AssignPositionsAndTexCoords(Vertex* vertices, const GrQuad& quad,
+    static void AssignPositionsAndTexCoords(Vertex* vertices, const GrPerspQuad& quad,
                                             const SkRect& texRect) {
+        SkPointPriv::SetRectTriStrip(&vertices[0].fTextureCoords, texRect, sizeof(Vertex));
+        for (int i = 0; i < 4; ++i) {
+            vertices[i].fPosition = quad.point(i);
+        }
+    }
+};
+
+template<typename Vertex> class VertexAAHandler<Vertex, GrAA::kYes, SkPoint> {
+public:
+    static void AssignPositionsAndTexCoords(Vertex* vertices, const GrPerspQuad& quad,
+                                            const SkRect& texRect) {
+        SkASSERT((quad.w4f() == Sk4f(1.f)).allTrue());
         auto x = quad.x4f();
         auto y = quad.y4f();
         Sk4f a, b, c;
@@ -359,7 +405,7 @@
     }
 
 private:
-    static void AssignTexCoords(Vertex* vertices, const GrQuad& quad, const SkRect& tex) {
+    static void AssignTexCoords(Vertex* vertices, const GrPerspQuad& quad, const SkRect& tex) {
         SkMatrix q = SkMatrix::MakeAll(quad.x(0), quad.x(1), quad.x(2),
                                        quad.y(0), quad.y(1), quad.y(2),
                                              1.f,       1.f,       1.f);
@@ -377,14 +423,73 @@
     }
 };
 
-template <typename Vertex, bool IsMultiTex> struct TexIdAssigner;
+template<typename Vertex> class VertexAAHandler<Vertex, GrAA::kYes, SkPoint3> {
+public:
+    static void AssignPositionsAndTexCoords(Vertex* vertices, const GrPerspQuad& quad,
+                                            const SkRect& texRect) {
+        auto x = quad.x4f();
+        auto y = quad.y4f();
+        auto iw = quad.iw4f();
+        x *= iw;
+        y *= iw;
+
+        // Get an equation for w from device space coords.
+        SkMatrix P;
+        P.setAll(x[0], y[0], 1, x[1], y[1], 1, x[2], y[2], 1);
+        SkAssertResult(P.invert(&P));
+        SkPoint3 weq{quad.w(0), quad.w(1), quad.w(2)};
+        P.mapHomogeneousPoints(&weq, &weq, 1);
+
+        Sk4f a, b, c;
+        compute_quad_edges_and_outset_vertices(&x, &y, &a, &b, &c);
+
+        // Compute new w values for the output vertices;
+        auto w = Sk4f(weq.fX) * x + Sk4f(weq.fY) * y + Sk4f(weq.fZ);
+        x *= w;
+        y *= w;
+
+        for (int i = 0; i < 4; ++i) {
+            vertices[i].fPosition = {x[i], y[i], w[i]};
+            for (int j = 0; j < 4; ++j) {
+                vertices[i].fEdges[j] = {a[j], b[j], c[j]};
+            }
+        }
+
+        AssignTexCoords(vertices, quad, texRect);
+    }
+
+private:
+    static void AssignTexCoords(Vertex* vertices, const GrPerspQuad& quad, const SkRect& tex) {
+        SkMatrix q = SkMatrix::MakeAll(quad.x(0), quad.x(1), quad.x(2),
+                                       quad.y(0), quad.y(1), quad.y(2),
+                                       quad.w(0), quad.w(1), quad.w(2));
+        SkMatrix qinv;
+        if (!q.invert(&qinv)) {
+            return;
+        }
+        SkMatrix t = SkMatrix::MakeAll(tex.fLeft, tex.fLeft,   tex.fRight,
+                                       tex.fTop,  tex.fBottom, tex.fTop,
+                                       1.f,       1.f,         1.f);
+        SkMatrix map;
+        map.setConcat(t, qinv);
+        SkPoint3 tempTexCoords[4];
+        SkMatrixPriv::MapHomogeneousPointsWithStride(map, tempTexCoords, sizeof(SkPoint3),
+                                                     &vertices[0].fPosition, sizeof(Vertex), 4);
+        for (int i = 0; i < 4; ++i) {
+            auto invW = 1.f / tempTexCoords[i].fZ;
+            vertices[i].fTextureCoords.fX = tempTexCoords[i].fX * invW;
+            vertices[i].fTextureCoords.fY = tempTexCoords[i].fY * invW;
+        }
+    }
+};
+
+template <typename Vertex, bool MT = Vertex::kIsMultiTexture> struct TexIdAssigner;
 
 template <typename Vertex> struct TexIdAssigner<Vertex, true> {
     static void Assign(Vertex* vertices, int textureIdx) {
-        vertices[0].fTextureIdx = textureIdx;
-        vertices[1].fTextureIdx = textureIdx;
-        vertices[2].fTextureIdx = textureIdx;
-        vertices[3].fTextureIdx = textureIdx;
+        for (int i = 0; i < 4; ++i) {
+            vertices[i].fTextureIdx = textureIdx;
+        }
     }
 };
 
@@ -393,8 +498,8 @@
 };
 }  // anonymous namespace
 
-template <typename Vertex, bool IsMultiTex, GrAA AA>
-static void tessellate_quad(const GrQuad& devQuad, const SkRect& srcRect, GrColor color,
+template <typename Vertex>
+static void tessellate_quad(const GrPerspQuad& devQuad, const SkRect& srcRect, GrColor color,
                             GrSurfaceOrigin origin, Vertex* vertices, SkScalar iw, SkScalar ih,
                             int textureIdx) {
     SkRect texRect = {
@@ -407,12 +512,12 @@
         texRect.fTop = 1.f - texRect.fTop;
         texRect.fBottom = 1.f - texRect.fBottom;
     }
-    VertexAAHandler<AA, Vertex>::AssignPositionsAndTexCoords(vertices, devQuad, texRect);
+    VertexAAHandler<Vertex>::AssignPositionsAndTexCoords(vertices, devQuad, texRect);
     vertices[0].fColor = color;
     vertices[1].fColor = color;
     vertices[2].fColor = color;
     vertices[3].fColor = color;
-    TexIdAssigner<Vertex, IsMultiTex>::Assign(vertices, textureIdx);
+    TexIdAssigner<Vertex>::Assign(vertices, textureIdx);
 }
 /**
  * Op that implements GrTextureOp::Make. It draws textured quads. Each quad can modulate against a
@@ -524,8 +629,10 @@
         draw.fSrcRect = srcRect;
         draw.fTextureIdx = 0;
         draw.fColor = color;
-        draw.fQuad = GrQuad(dstRect, viewMatrix);
-        SkRect bounds = draw.fQuad.bounds();
+        fPerspective = viewMatrix.hasPerspective();
+        SkRect bounds;
+        draw.fQuad = GrPerspQuad(dstRect, viewMatrix);
+        bounds = draw.fQuad.bounds();
         this->setBounds(bounds, HasAABloat::kNo, IsZeroArea::kNo);
 
         fMaxApproxDstPixelArea = RectSizeAsSizeT(bounds);
@@ -543,9 +650,9 @@
         }
 
         bool coverageAA = GrAAType::kCoverage == this->aaType();
-        sk_sp<GrGeometryProcessor> gp =
-                TextureGeometryProcessor::Make(proxiesSPs, fProxyCnt, std::move(fColorSpaceXform),
-                                               coverageAA, filters, *target->caps().shaderCaps());
+        sk_sp<GrGeometryProcessor> gp = TextureGeometryProcessor::Make(
+                proxiesSPs, fProxyCnt, std::move(fColorSpaceXform), coverageAA, fPerspective,
+                filters, *target->caps().shaderCaps());
         GrPipeline::InitArgs args;
         args.fProxy = target->proxy();
         args.fCaps = &target->caps();
@@ -568,59 +675,52 @@
             SkDebugf("Could not allocate vertices\n");
             return;
         }
+
+// Generic lambda in C++14?
+#define TESS_VERTS(Vertex)                                                                     \
+    SkASSERT(gp->getVertexStride() == sizeof(Vertex));                                         \
+    auto vertices = static_cast<Vertex*>(vdata);                                               \
+    for (const auto& draw : fDraws) {                                                          \
+        auto origin = proxies[draw.fTextureIdx]->origin();                                     \
+        tessellate_quad<Vertex>(draw.fQuad, draw.fSrcRect, draw.fColor, origin, vertices,      \
+                                iw[draw.fTextureIdx], ih[draw.fTextureIdx], draw.fTextureIdx); \
+        vertices += 4;                                                                         \
+    }
+
+        float iw[kMaxTextures];
+        float ih[kMaxTextures];
+        for (int t = 0; t < fProxyCnt; ++t) {
+            const auto* texture = proxies[t]->priv().peekTexture();
+            iw[t] = 1.f / texture->width();
+            ih[t] = 1.f / texture->height();
+        }
+
         if (1 == fProxyCnt) {
-            GrSurfaceOrigin origin = proxies[0]->origin();
-            GrTexture* texture = proxies[0]->priv().peekTexture();
-            float iw = 1.f / texture->width();
-            float ih = 1.f / texture->height();
             if (coverageAA) {
-                SkASSERT(gp->getVertexStride() == sizeof(TextureGeometryProcessor::AAVertex));
-                auto vertices = static_cast<TextureGeometryProcessor::AAVertex*>(vdata);
-                for (int i = 0; i < fDraws.count(); ++i) {
-                    tessellate_quad<TextureGeometryProcessor::AAVertex, false, GrAA::kYes>(
-                            fDraws[i].fQuad, fDraws[i].fSrcRect, fDraws[i].fColor, origin,
-                            vertices + 4 * i, iw, ih, 0);
+                if (fPerspective) {
+                    TESS_VERTS(TextureGeometryProcessor::AAVertex<SkPoint3>)
+                } else {
+                    TESS_VERTS(TextureGeometryProcessor::AAVertex<SkPoint>)
                 }
             } else {
-                SkASSERT(gp->getVertexStride() == sizeof(TextureGeometryProcessor::Vertex));
-                auto vertices = static_cast<TextureGeometryProcessor::Vertex*>(vdata);
-                for (int i = 0; i < fDraws.count(); ++i) {
-                    tessellate_quad<TextureGeometryProcessor::Vertex, false, GrAA::kNo>(
-                            fDraws[i].fQuad, fDraws[i].fSrcRect, fDraws[i].fColor, origin,
-                            vertices + 4 * i, iw, ih, 0);
+                if (fPerspective) {
+                    TESS_VERTS(TextureGeometryProcessor::Vertex<SkPoint3>)
+                } else {
+                    TESS_VERTS(TextureGeometryProcessor::Vertex<SkPoint>)
                 }
             }
         } else {
-            GrTexture* textures[kMaxTextures];
-            float iw[kMaxTextures];
-            float ih[kMaxTextures];
-            for (int t = 0; t < fProxyCnt; ++t) {
-                textures[t] = proxies[t]->priv().peekTexture();
-                iw[t] = 1.f / textures[t]->width();
-                ih[t] = 1.f / textures[t]->height();
-            }
             if (coverageAA) {
-                SkASSERT(gp->getVertexStride() ==
-                         sizeof(TextureGeometryProcessor::AAMultiTextureVertex));
-                auto vertices = static_cast<TextureGeometryProcessor::AAMultiTextureVertex*>(vdata);
-                for (int i = 0; i < fDraws.count(); ++i) {
-                    auto tidx = fDraws[i].fTextureIdx;
-                    GrSurfaceOrigin origin = proxies[tidx]->origin();
-                    tessellate_quad<TextureGeometryProcessor::AAMultiTextureVertex, true,
-                                    GrAA::kYes>(fDraws[i].fQuad, fDraws[i].fSrcRect,
-                                                fDraws[i].fColor, origin, vertices + 4 * i,
-                                                iw[tidx], ih[tidx], tidx);
+                if (fPerspective) {
+                    TESS_VERTS(TextureGeometryProcessor::AAMultiTextureVertex<SkPoint3>)
+                } else {
+                    TESS_VERTS(TextureGeometryProcessor::AAMultiTextureVertex<SkPoint>)
                 }
             } else {
-                SkASSERT(gp->getVertexStride() ==
-                         sizeof(TextureGeometryProcessor::MultiTextureVertex));
-                auto vertices = static_cast<TextureGeometryProcessor::MultiTextureVertex*>(vdata);
-                for (int i = 0; i < fDraws.count(); ++i) {
-                    auto tidx = fDraws[i].fTextureIdx;
-                    GrSurfaceOrigin origin = proxies[tidx]->origin();
-                    tessellate_quad<TextureGeometryProcessor::MultiTextureVertex, true, GrAA::kNo>(
-                            fDraws[i].fQuad, fDraws[i].fSrcRect, fDraws[i].fColor, origin,
-                            vertices + 4 * i, iw[tidx], ih[tidx], tidx);
+                if (fPerspective) {
+                    TESS_VERTS(TextureGeometryProcessor::MultiTextureVertex<SkPoint3>)
+                } else {
+                    TESS_VERTS(TextureGeometryProcessor::MultiTextureVertex<SkPoint>)
                 }
             }
         }
@@ -704,6 +804,7 @@
         }
         this->joinBounds(*that);
         fMaxApproxDstPixelArea = SkTMax(that->fMaxApproxDstPixelArea, fMaxApproxDstPixelArea);
+        fPerspective |= that->fPerspective;
         return true;
     }
 
@@ -779,7 +880,7 @@
     struct Draw {
         SkRect fSrcRect;
         int fTextureIdx;
-        GrQuad fQuad;
+        GrPerspQuad fQuad;
         GrColor fColor;
     };
     SkSTArray<1, Draw, true> fDraws;
@@ -795,6 +896,7 @@
     GrSamplerState::Filter fFilter0;
     uint8_t fProxyCnt;
     unsigned fAAType : 2;
+    unsigned fPerspective : 1;
     // Used to track whether fProxy is ref'ed or has a pending IO after finalize() is called.
     unsigned fFinalized : 1;
     unsigned fAllowSRGBInputs : 1;
@@ -813,7 +915,6 @@
                                GrColor color, const SkRect& srcRect, const SkRect& dstRect,
                                GrAAType aaType, const SkMatrix& viewMatrix,
                                sk_sp<GrColorSpaceXform> csxf, bool allowSRGBInputs) {
-    SkASSERT(!viewMatrix.hasPerspective());
     return TextureOp::Make(std::move(proxy), filter, color, srcRect, dstRect, aaType, viewMatrix,
                            std::move(csxf), allowSRGBInputs);
 }