Support per-entry transforms in image-set API

Bug: skia:
Change-Id: I508ec8cb1df1c407853b401c73c66a575fb9c661
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/196642
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/gm/compositor_quads.cpp b/gm/compositor_quads.cpp
index 3a9db78..e111774 100644
--- a/gm/compositor_quads.cpp
+++ b/gm/compositor_quads.cpp
@@ -186,12 +186,14 @@
     // region within the tile (reset for each tile).
     //
     // The edgeAA order matches that of clip, so it refers to top, right, bottom, left.
-    virtual void drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4],
+    // Return draw count
+    virtual int drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4],
                           const bool edgeAA[4], int tileID, int quadID) = 0;
 
     virtual void drawBanner(SkCanvas* canvas) = 0;
 
-    virtual void drawTiles(SkCanvas* canvas, GrContext* context, GrRenderTargetContext* rtc) {
+    // Return draw count
+    virtual int drawTiles(SkCanvas* canvas, GrContext* context, GrRenderTargetContext* rtc) {
         // TODO (michaelludwig) - once the quad APIs are in SkCanvas, drop these
         // cached fields, which drawTile() needs
         fContext = context;
@@ -210,7 +212,7 @@
             // to test locally (and must add ClipTileRenderer as a friend in SkPaintFilterCanvas)
             // SkPaintFilterCanvas* filteredCanvas = static_cast<SkPaintFilterCanvas*>(canvas);
             // fDevice = static_cast<SkGpuDevice*>(filteredCanvas->proxy()->getDevice());
-            return;
+            return 0;
         }
 
         // All three lines in a list
@@ -221,6 +223,7 @@
 
         bool edgeAA[4];
         int tileID = 0;
+        int drawCount = 0;
         for (int i = 0; i < kRowCount; ++i) {
             for (int j = 0; j < kColCount; ++j) {
                 // The unclipped tile geometry
@@ -235,10 +238,13 @@
                 // Now clip against the 3 lines formed by kClipPx and split into general purpose
                 // quads as needed.
                 int quadCount = 0;
-                this->clipTile(canvas, tileID, tile, nullptr, edgeAA, lines, 3, &quadCount);
+                drawCount += this->clipTile(canvas, tileID, tile, nullptr, edgeAA, lines, 3,
+                                            &quadCount);
                 tileID++;
             }
         }
+
+        return drawCount;
     }
 
 protected:
@@ -257,14 +263,14 @@
     // Recursively splits the quadrilateral against the segments stored in 'lines', which must be
     // 2 * lineCount long. Increments 'quadCount' for each split quadrilateral, and invokes the
     // drawTile at leaves.
-    void clipTile(SkCanvas* canvas, int tileID, const SkRect& baseRect, const SkPoint quad[4],
+    int clipTile(SkCanvas* canvas, int tileID, const SkRect& baseRect, const SkPoint quad[4],
                   const bool edgeAA[4], const SkPoint lines[], int lineCount, int* quadCount) {
         if (lineCount == 0) {
             // No lines, so end recursion by drawing the tile. If the tile was never split then
             // 'quad' remains null so that drawTile() can differentiate how it should draw.
-            this->drawTile(canvas, baseRect, quad, edgeAA, tileID, *quadCount);
+            int draws = this->drawTile(canvas, baseRect, quad, edgeAA, tileID, *quadCount);
             *quadCount = *quadCount + 1;
-            return;
+            return draws;
         }
 
         static constexpr int kTL = 0; // Top-left point index in points array
@@ -376,12 +382,13 @@
                     // been found with edge 0 or edge 2 for the other end
                 default:
                     SkASSERT(false);
-                    return;
+                    return 0;
             }
         }
 
         SkPoint sub[4];
         bool subAA[4];
+        int draws = 0;
         for (int i = 0; i < subtiles.count(); ++i) {
             // Fill in the quad points and update edge AA rules for new interior edges
             for (int j = 0; j < 4; ++j) {
@@ -403,12 +410,15 @@
             }
 
             // Split the sub quad with the next line
-            this->clipTile(canvas, tileID, baseRect, sub, subAA, lines + 2, lineCount - 1,
-                           quadCount);
+            draws += this->clipTile(canvas, tileID, baseRect, sub, subAA, lines + 2, lineCount - 1,
+                                    quadCount);
         }
+        return draws;
     }
 };
 
+static constexpr int kMatrixCount = 5;
+
 class CompositorGM : public skiagm::GpuGM {
 public:
     CompositorGM(const char* name, sk_sp<ClipTileRenderer> renderer)
@@ -447,15 +457,10 @@
         static constexpr SkScalar kBannerWidth = 120.f;
         static constexpr SkScalar kOffset = 15.f;
 
-        // Print a row header
-        canvas->save();
-        canvas->translate(kOffset, kGap + 0.5f * kRowCount * kTileHeight);
-        for (int j = 0; j < fRenderers.count(); ++j) {
-            fRenderers[j]->drawBanner(canvas);
-            canvas->translate(0.f, kGap + kRowCount * kTileHeight);
-        }
-        canvas->restore();
+        SkTArray<int> drawCounts(fRenderers.count());
+        drawCounts.push_back_n(fRenderers.count(), 0);
 
+        canvas->save();
         canvas->translate(kOffset + kBannerWidth, kOffset);
         for (int i = 0; i < fMatrices.count(); ++i) {
             canvas->save();
@@ -468,7 +473,7 @@
                 draw_clipping_boundaries(canvas, fMatrices[i]);
 
                 canvas->concat(fMatrices[i]);
-                fRenderers[j]->drawTiles(canvas, ctx, rtc);
+                drawCounts[j] += fRenderers[j]->drawTiles(canvas, ctx, rtc);
 
                 canvas->restore();
                 // And advance to the next row
@@ -479,11 +484,21 @@
             // And advance to the next column
             canvas->translate(kGap + kColCount * kTileWidth, 0.f);
         }
+        canvas->restore();
+
+        // Print a row header, with total draw counts
+        canvas->save();
+        canvas->translate(kOffset, kGap + 0.5f * kRowCount * kTileHeight);
+        for (int j = 0; j < fRenderers.count(); ++j) {
+            fRenderers[j]->drawBanner(canvas);
+            canvas->translate(0.f, 15.f);
+            draw_text(canvas, SkStringPrintf("Draws = %d", drawCounts[j]).c_str());
+            canvas->translate(0.f, kGap + kRowCount * kTileHeight);
+        }
+        canvas->restore();
     }
 
 private:
-    static constexpr int kMatrixCount = 5;
-
     SkTArray<sk_sp<ClipTileRenderer>> fRenderers;
     SkTArray<SkMatrix> fMatrices;
     SkTArray<SkString> fMatrixNames;
@@ -551,7 +566,7 @@
         return sk_sp<ClipTileRenderer>(new DebugTileRenderer(SkCanvas::kNone_QuadAAFlags, true));
     }
 
-    void drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4], const bool edgeAA[4],
+    int drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4], const bool edgeAA[4],
                   int tileID, int quadID) override {
         // Colorize the tile based on its grid position and quad ID
         int i = tileID / kColCount;
@@ -567,10 +582,10 @@
         SkCanvas::QuadAAFlags aaFlags = fEnableAAOverride ? fAAOverride : this->maskToFlags(edgeAA);
         fDevice->tmp_drawEdgeAAQuad(
                 rect, clip, clip ? 4 : 0, aaFlags, c.toSkColor(), SkBlendMode::kSrcOver);
+        return 1;
     }
 
     void drawBanner(SkCanvas* canvas) override {
-        canvas->save();
         draw_text(canvas, "Edge AA");
         canvas->translate(0.f, 15.f);
 
@@ -587,9 +602,7 @@
         } else {
             config.appendf(kFormat, "yes", "no");
         }
-        canvas->translate(0.f, 6.f);
         draw_text(canvas, config.c_str());
-        canvas->restore();
     }
 
 private:
@@ -611,10 +624,11 @@
         return sk_sp<ClipTileRenderer>(new SolidColorRenderer(color));
     }
 
-    void drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4], const bool edgeAA[4],
+    int drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4], const bool edgeAA[4],
                   int tileID, int quadID) override {
         fDevice->tmp_drawEdgeAAQuad(rect, clip, clip ? 4 : 0, this->maskToFlags(edgeAA),
                                     fColor.toSkColor(), SkBlendMode::kSrcOver);
+        return 1;
     }
 
     void drawBanner(SkCanvas* canvas) override {
@@ -635,41 +649,43 @@
 public:
 
     static sk_sp<ClipTileRenderer> MakeUnbatched(sk_sp<SkImage> image) {
-        return Make("Texture", "", std::move(image), nullptr, nullptr, nullptr, nullptr, 1.f, true);
+        return Make("Texture", "", std::move(image), nullptr, nullptr, nullptr, nullptr,
+                    1.f, true, 0);
     }
 
-    static sk_sp<ClipTileRenderer> MakeBatched(sk_sp<SkImage> image) {
-        return Make("Texture Set", "", std::move(image), nullptr, nullptr, nullptr, nullptr, 1.f,
-                    false);
+    static sk_sp<ClipTileRenderer> MakeBatched(sk_sp<SkImage> image, int transformCount) {
+        const char* subtitle = transformCount == 0 ? "" : "w/ xforms";
+        return Make("Texture Set", subtitle, std::move(image), nullptr, nullptr, nullptr, nullptr,
+                    1.f, false, transformCount);
     }
 
     static sk_sp<ClipTileRenderer> MakeShader(const char* name, sk_sp<SkImage> image,
                                               sk_sp<SkShader> shader, bool local) {
         return Make("Shader", name, std::move(image), std::move(shader),
-                    nullptr, nullptr, nullptr, 1.f, local);
+                    nullptr, nullptr, nullptr, 1.f, local, 0);
     }
 
     static sk_sp<ClipTileRenderer> MakeColorFilter(const char* name, sk_sp<SkImage> image,
                                                    sk_sp<SkColorFilter> filter) {
         return Make("Color Filter", name, std::move(image), nullptr, std::move(filter), nullptr,
-                    nullptr, 1.f, false);
+                    nullptr, 1.f, false, 0);
     }
 
     static sk_sp<ClipTileRenderer> MakeImageFilter(const char* name, sk_sp<SkImage> image,
                                                    sk_sp<SkImageFilter> filter) {
         return Make("Image Filter", name, std::move(image), nullptr, nullptr, std::move(filter),
-                    nullptr, 1.f, false);
+                    nullptr, 1.f, false, 0);
     }
 
     static sk_sp<ClipTileRenderer> MakeMaskFilter(const char* name, sk_sp<SkImage> image,
                                                   sk_sp<SkMaskFilter> filter) {
         return Make("Mask Filter", name, std::move(image), nullptr, nullptr, nullptr,
-                    std::move(filter), 1.f, false);
+                    std::move(filter), 1.f, false, 0);
     }
 
     static sk_sp<ClipTileRenderer> MakeAlpha(sk_sp<SkImage> image, SkScalar alpha) {
         return Make("Alpha", SkStringPrintf("a = %.2f", alpha).c_str(), std::move(image), nullptr,
-                    nullptr, nullptr, nullptr, alpha, false);
+                    nullptr, nullptr, nullptr, alpha, false, 0);
     }
 
     static sk_sp<ClipTileRenderer> Make(const char* topBanner, const char* bottomBanner,
@@ -677,32 +693,23 @@
                                         sk_sp<SkColorFilter> colorFilter,
                                         sk_sp<SkImageFilter> imageFilter,
                                         sk_sp<SkMaskFilter> maskFilter, SkScalar paintAlpha,
-                                        bool resetAfterEachQuad) {
+                                        bool resetAfterEachQuad, int transformCount) {
         return sk_sp<ClipTileRenderer>(new TextureSetRenderer(topBanner, bottomBanner,
                 std::move(image), std::move(shader), std::move(colorFilter), std::move(imageFilter),
-                std::move(maskFilter), paintAlpha, resetAfterEachQuad));
+                std::move(maskFilter), paintAlpha, resetAfterEachQuad, transformCount));
     }
 
-    void drawTiles(SkCanvas* canvas, GrContext* ctx, GrRenderTargetContext* rtc) override {
+    int drawTiles(SkCanvas* canvas, GrContext* ctx, GrRenderTargetContext* rtc) override {
         SkASSERT(fImage); // initImage should be called before any drawing
-        this->INHERITED::drawTiles(canvas, ctx, rtc);
+        int draws = this->INHERITED::drawTiles(canvas, ctx, rtc);
         // Push the last tile set
-        this->drawAndReset(canvas);
+        draws += this->drawAndReset(canvas);
+        return draws;
     }
 
-    void drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4], const bool edgeAA[4],
+    int drawTile(SkCanvas* canvas, const SkRect& rect, const SkPoint clip[4], const bool edgeAA[4],
                   int tileID, int quadID) override {
-        // Submit the last batch if we've moved on to a new tile
-        if (tileID != fCurrentTileID) {
-            this->drawAndReset(canvas);
-        }
-        SkASSERT((fCurrentTileID < 0 && fDstClips.count() == 0 && fDstClipCounts.count() == 0 &&
-                  fSetEntries.count() == 0) ||
-                 (fCurrentTileID == tileID && fSetEntries.count() > 0));
-
         // Now don't actually draw the tile, accumulate it in the growing entry set
-        fCurrentTileID = tileID;
-
         int clipCount = 0;
         if (clip) {
             // Record the four points into fDstClips
@@ -710,6 +717,30 @@
             fDstClips.push_back_n(4, clip);
         }
 
+        int preViewIdx = -1;
+        if (!fResetEachQuad && fTransformBatchCount > 0) {
+            // Handle transform batching. This works by capturing the CTM of the first tile draw,
+            // and then calculate the difference between that and future CTMs for later tiles.
+            if (fPreViewXforms.count() == 0) {
+                fBaseCTM = canvas->getTotalMatrix();
+                fPreViewXforms.push_back(SkMatrix::I());
+                preViewIdx = 0;
+            } else {
+                // Calculate matrix s.t. getTotalMatrix() = fBaseCTM * M
+                SkMatrix invBase;
+                if (!fBaseCTM.invert(&invBase)) {
+                    SkDebugf("Cannot invert CTM, transform batching will not be correct.\n");
+                } else {
+                    SkMatrix preView = SkMatrix::Concat(invBase, canvas->getTotalMatrix());
+                    if (preView != fPreViewXforms[fPreViewXforms.count() - 1]) {
+                        // Add the new matrix
+                        fPreViewXforms.push_back(preView);
+                    } // else re-use the last matrix
+                    preViewIdx = fPreViewXforms.count() - 1;
+                }
+            }
+        }
+
         // This acts like the whole image is rendered over the entire tile grid, so derive local
         // coordinates from 'rect', based on the grid to image transform.
         SkMatrix gridToImage = SkMatrix::MakeRectToRect(SkRect::MakeWH(kColCount * kTileWidth,
@@ -723,15 +754,17 @@
         // is not null.
         fSetEntries.push_back({fImage, localRect, rect, 1.f, this->maskToFlags(edgeAA)});
         fDstClipCounts.push_back(clipCount);
+        fPreViewIdx.push_back(preViewIdx);
 
         if (fResetEachQuad) {
             // Only ever draw one entry at a time
-            this->drawAndReset(canvas);
+            return this->drawAndReset(canvas);
+        } else {
+            return 0;
         }
     }
 
     void drawBanner(SkCanvas* canvas) override {
-        canvas->save();
         if (fTopBanner.size() > 0) {
             draw_text(canvas, fTopBanner.c_str());
         }
@@ -739,7 +772,6 @@
         if (fBottomBanner.size() > 0) {
             draw_text(canvas, fBottomBanner.c_str());
         }
-        canvas->restore();
     }
 
 private:
@@ -752,13 +784,20 @@
     sk_sp<SkImageFilter> fImageFilter;
     sk_sp<SkMaskFilter> fMaskFilter;
     SkScalar fPaintAlpha;
+
+    // Batching rules
     bool fResetEachQuad;
+    int fTransformBatchCount;
 
     SkTArray<SkPoint> fDstClips;
-    // ImageSetEntry does not yet have a fDstClipCount field
+    SkTArray<SkMatrix> fPreViewXforms;
+    // ImageSetEntry does not yet have a fDstClipCount or fPreViewIdx field
     SkTArray<int> fDstClipCounts;
+    SkTArray<int> fPreViewIdx;
     SkTArray<SkCanvas::ImageSetEntry> fSetEntries;
-    int fCurrentTileID;
+
+    SkMatrix fBaseCTM;
+    int fBatchCount;
 
     TextureSetRenderer(const char* topBanner,
                        const char* bottomBanner,
@@ -768,19 +807,23 @@
                        sk_sp<SkImageFilter> imageFilter,
                        sk_sp<SkMaskFilter> maskFilter,
                        SkScalar paintAlpha,
-                       bool resetEachQuad)
-        : fTopBanner(topBanner)
-        , fBottomBanner(bottomBanner)
-        , fImage(std::move(image))
-        , fShader(std::move(shader))
-        , fColorFilter(std::move(colorFilter))
-        , fImageFilter(std::move(imageFilter))
-        , fMaskFilter(std::move(maskFilter))
-        , fPaintAlpha(paintAlpha)
-        , fResetEachQuad(resetEachQuad)
-        , fCurrentTileID(-1) {}
+                       bool resetEachQuad,
+                       int transformBatchCount)
+            : fTopBanner(topBanner)
+            , fBottomBanner(bottomBanner)
+            , fImage(std::move(image))
+            , fShader(std::move(shader))
+            , fColorFilter(std::move(colorFilter))
+            , fImageFilter(std::move(imageFilter))
+            , fMaskFilter(std::move(maskFilter))
+            , fPaintAlpha(paintAlpha)
+            , fResetEachQuad(resetEachQuad)
+            , fTransformBatchCount(transformBatchCount)
+            , fBatchCount(0) {
+        SkASSERT(transformBatchCount >= 0 && (!resetEachQuad || transformBatchCount == 0));
+    }
 
-    void configureTilePaint(const SkRect& rect, int tileID, SkPaint* paint) const {
+    void configureTilePaint(const SkRect& rect, SkPaint* paint) const {
         paint->setAntiAlias(true);
         paint->setFilterQuality(kLow_SkFilterQuality);
         paint->setBlendMode(SkBlendMode::kSrcOver);
@@ -806,37 +849,58 @@
         paint->setMaskFilter(fMaskFilter);
     }
 
-    void drawAndReset(SkCanvas* canvas) {
+    int drawAndReset(SkCanvas* canvas) {
         // Early out if there's nothing to draw
         if (fSetEntries.count() == 0) {
-            SkASSERT(fCurrentTileID < 0 && fDstClips.count() == 0 && fDstClipCounts.count() == 0);
-            return;
+            SkASSERT(fDstClips.count() == 0 && fPreViewXforms.count() == 0 &&
+                     fDstClipCounts.count() == 0 && fPreViewIdx.count() == 0);
+            return 0;
         }
 
-        // NOTE: Eventually fDstClipCounts will just be stored as a field on each entry
+        if (!fResetEachQuad && fTransformBatchCount > 0) {
+            // A batch is completed
+            fBatchCount++;
+            if (fBatchCount < fTransformBatchCount) {
+                // Haven't hit the point to submit yet, but end the current tile
+                return 0;
+            }
+
+            // Submitting all tiles back to where fBaseCTM was the canvas' matrix, while the
+            // canvas currently has the CTM of the last tile batch, so reset it.
+            canvas->setMatrix(fBaseCTM);
+        }
+
+        // NOTE: Eventually these will just be stored as a field on each entry
         SkASSERT(fDstClipCounts.count() == fSetEntries.count());
+        SkASSERT(fPreViewIdx.count() == fSetEntries.count());
 
 #ifdef SK_DEBUG
         int expectedDstClipCount = 0;
         for (int i = 0; i < fDstClipCounts.count(); ++i) {
             expectedDstClipCount += fDstClipCounts[i];
+            SkASSERT(fPreViewIdx[i] < 0 || fPreViewIdx[i] < fPreViewXforms.count());
         }
         SkASSERT(expectedDstClipCount == fDstClips.count());
 #endif
 
         SkPaint paint;
         SkRect lastTileRect = fSetEntries[fSetEntries.count() - 1].fDstRect;
-        this->configureTilePaint(lastTileRect, fCurrentTileID, &paint);
+        this->configureTilePaint(lastTileRect, &paint);
 
-        fDevice->tmp_drawImageSetV2(fSetEntries.begin(), fDstClipCounts.begin(),
-                                    fSetEntries.count(), fDstClips.begin(), paint,
-                                    SkCanvas::kFast_SrcRectConstraint);
+        fDevice->tmp_drawImageSetV3(fSetEntries.begin(), fDstClipCounts.begin(),
+                                    fPreViewIdx.begin(), fSetEntries.count(),
+                                    fDstClips.begin(), fPreViewXforms.begin(),
+                                    paint, SkCanvas::kFast_SrcRectConstraint);
 
         // Reset for next tile
-        fCurrentTileID = -1;
         fDstClips.reset();
         fDstClipCounts.reset();
+        fPreViewXforms.reset();
+        fPreViewIdx.reset();
         fSetEntries.reset();
+        fBatchCount = 0;
+
+        return 1;
     }
 
     typedef ClipTileRenderer INHERITED;
@@ -872,7 +936,8 @@
     sk_sp<SkImage> mandrill = GetResourceAsImage("images/mandrill_512.png");
     SkTArray<sk_sp<ClipTileRenderer>> renderers;
     renderers.push_back(TextureSetRenderer::MakeUnbatched(mandrill));
-    renderers.push_back(TextureSetRenderer::MakeBatched(mandrill));
+    renderers.push_back(TextureSetRenderer::MakeBatched(mandrill, 0));
+    renderers.push_back(TextureSetRenderer::MakeBatched(mandrill, kMatrixCount));
     return renderers;
 }
 
diff --git a/src/gpu/GrRenderTargetContext.cpp b/src/gpu/GrRenderTargetContext.cpp
index 3c426c4..a148cef 100644
--- a/src/gpu/GrRenderTargetContext.cpp
+++ b/src/gpu/GrRenderTargetContext.cpp
@@ -1068,14 +1068,20 @@
     if (mode != SkBlendMode::kSrcOver ||
         !fContext->priv().caps()->dynamicStateArrayGeometryProcessorTextureSupport()) {
         // Draw one at a time with GrFillRectOp and a GrPaint that emulates what GrTextureOp does
+        SkMatrix ctm;
         for (int i = 0; i < cnt; ++i) {
             float alpha = set[i].fAlpha;
+            ctm = viewMatrix;
+            if (set[i].fPreViewMatrix) {
+                ctm.preConcat(*set[i].fPreViewMatrix);
+            }
+
             if (set[i].fDstClipQuad == nullptr) {
                 // Stick with original rectangles, which allows the ops to know more about what's
                 // being drawn.
                 this->drawTexture(clip, set[i].fProxy, filter, mode, {alpha, alpha, alpha, alpha},
                                   set[i].fSrcRect, set[i].fDstRect, aa, set[i].fAAFlags,
-                                  SkCanvas::kFast_SrcRectConstraint, viewMatrix, texXform);
+                                  SkCanvas::kFast_SrcRectConstraint, ctm, texXform);
             } else {
                 // Generate interpolated texture coordinates to match the dst clip
                 SkPoint srcQuad[4];
@@ -1085,7 +1091,7 @@
                 // keep seams look more correct.
                 this->drawTextureQuad(clip, set[i].fProxy, filter, mode,
                                       {alpha, alpha, alpha, alpha}, srcQuad, set[i].fDstClipQuad,
-                                      aa, set[i].fAAFlags, nullptr, viewMatrix, texXform);
+                                      aa, set[i].fAAFlags, nullptr, ctm, texXform);
             }
         }
     } else {
diff --git a/src/gpu/GrRenderTargetContext.h b/src/gpu/GrRenderTargetContext.h
index 1d2f7a5..4691e94 100644
--- a/src/gpu/GrRenderTargetContext.h
+++ b/src/gpu/GrRenderTargetContext.h
@@ -197,6 +197,7 @@
         SkRect fSrcRect;
         SkRect fDstRect;
         const SkPoint* fDstClipQuad; // Must be null, or point to an array of 4 points
+        const SkMatrix* fPreViewMatrix; // If not null, entry's CTM is 'viewMatrix' * fPreViewMatrix
         float fAlpha;
         GrQuadAAFlags fAAFlags;
     };
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 1dc594b..eb8b84a 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -1298,8 +1298,8 @@
                                 const SkPaint& paint, SkCanvas::SrcRectConstraint constraint) {
     ASSERT_SINGLE_OWNER
     GrQuadAAFlags aaFlags = paint.isAntiAlias() ? GrQuadAAFlags::kAll : GrQuadAAFlags::kNone;
-    this->drawImageQuad(
-            image, src, &dst, nullptr, GrAA(paint.isAntiAlias()), aaFlags, paint, constraint);
+    this->drawImageQuad(image, src, &dst, nullptr, GrAA(paint.isAntiAlias()), aaFlags, nullptr,
+                        paint, constraint);
 }
 
 // When drawing nine-patches or n-patches, cap the filter quality at kBilerp.
@@ -1410,7 +1410,7 @@
     paint.setBlendMode(mode);
     paint.setFilterQuality(filterQuality);
     paint.setAntiAlias(true);
-    this->tmp_drawImageSetV2(set, nullptr, count, nullptr, paint,
+    this->tmp_drawImageSetV3(set, nullptr, nullptr, count, nullptr, nullptr, paint,
                              SkCanvas::kFast_SrcRectConstraint);
 }
 
diff --git a/src/gpu/SkGpuDevice.h b/src/gpu/SkGpuDevice.h
index 9d60096..b41b8c8 100644
--- a/src/gpu/SkGpuDevice.h
+++ b/src/gpu/SkGpuDevice.h
@@ -130,10 +130,12 @@
 
     /*
      * dstClipCounts[] is a parallel array to the image entries, acting like the intended
-     * dstClipCount field in ImageSetEntry.
+     * dstClipCount field in ImageSetEntry. Similarly, preViewMatrixIdx is parallel and will
+     * become an index field in ImageSetEntry that specifies an entry in the matrix array.
      */
-    void tmp_drawImageSetV2(const SkCanvas::ImageSetEntry[], int dstClipCounts[], int count,
-            const SkPoint dstClips[], const SkPaint& paint,
+    void tmp_drawImageSetV3(const SkCanvas::ImageSetEntry[],
+            int dstClipCounts[], int preViewMatrixIdx[], int count,
+            const SkPoint dstClips[], const SkMatrix preViewMatrices[], const SkPaint& paint,
             SkCanvas::SrcRectConstraint constraint = SkCanvas::kStrict_SrcRectConstraint);
     void tmp_drawEdgeAAQuad(const SkRect& rect, const SkPoint clip[], int clipCount,
                             SkCanvas::QuadAAFlags aaFlags, SkColor color, SkBlendMode mode);
@@ -219,10 +221,11 @@
                         bool bicubic,
                         bool needsTextureDomain);
 
-    // If not null, dstClip must be contained inside dst and will also respect the edge AA flags
+    // If not null, dstClip must be contained inside dst and will also respect the edge AA flags.
+    // If 'preViewMatrix' is not null, final CTM will be this->ctm() * preViewMatrix.
     void drawImageQuad(const SkImage*, const SkRect* src, const SkRect* dst,
                        const SkPoint dstClip[4], GrAA aa, GrQuadAAFlags aaFlags,
-                       const SkPaint&, SkCanvas::SrcRectConstraint);
+                       const SkMatrix* preViewMatrix, const SkPaint&, SkCanvas::SrcRectConstraint);
 
     // TODO(michaelludwig): This can be removed once drawBitmapRect is removed from SkDevice
     // so that drawImageQuad is the sole entry point into the draw-single-image op
diff --git a/src/gpu/SkGpuDevice_drawTexture.cpp b/src/gpu/SkGpuDevice_drawTexture.cpp
index 5b50506..8c280d5 100644
--- a/src/gpu/SkGpuDevice_drawTexture.cpp
+++ b/src/gpu/SkGpuDevice_drawTexture.cpp
@@ -355,7 +355,8 @@
 
 void SkGpuDevice::drawImageQuad(const SkImage* image, const SkRect* srcRect, const SkRect* dstRect,
                                 const SkPoint dstClip[4], GrAA aa, GrQuadAAFlags aaFlags,
-                                const SkPaint& paint, SkCanvas::SrcRectConstraint constraint) {
+                                const SkMatrix* preViewMatrix, const SkPaint& paint,
+                                SkCanvas::SrcRectConstraint constraint) {
     SkRect src;
     SkRect dst;
     SkMatrix srcToDst;
@@ -372,15 +373,21 @@
     bool useDecal = mode == ImageDrawMode::kDecal;
     bool attemptDrawTexture = !useDecal; // rtc->drawTexture() only clamps
 
+    // Get final CTM matrix
+    SkMatrix ctm = this->ctm();
+    if (preViewMatrix) {
+        ctm.preConcat(*preViewMatrix);
+    }
+
     // YUVA images can be stored in multiple images with different plane resolutions, so this
     // uses an effect to combine them dynamically on the GPU. This is done before requesting a
     // pinned texture proxy because YUV images force-flatten to RGBA in that scenario.
     if (as_IB(image)->isYUVA()) {
         SK_HISTOGRAM_BOOLEAN("DrawTiled", false);
-        LogDrawScaleFactor(this->ctm(), srcToDst, paint.getFilterQuality());
+        LogDrawScaleFactor(ctm, srcToDst, paint.getFilterQuality());
 
         GrYUVAImageTextureMaker maker(fContext.get(), image, useDecal);
-        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), this->ctm(),
+        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), ctm,
                               paint, &maker, src, dst, dstClip, srcToDst, aa, aaFlags, constraint,
                               /* attempt draw texture */ false);
         return;
@@ -392,20 +399,20 @@
     if (sk_sp<GrTextureProxy> proxy = as_IB(image)->refPinnedTextureProxy(this->context(),
                                                                           &pinnedUniqueID)) {
         SK_HISTOGRAM_BOOLEAN("DrawTiled", false);
-        LogDrawScaleFactor(this->ctm(), srcToDst, paint.getFilterQuality());
+        LogDrawScaleFactor(ctm, srcToDst, paint.getFilterQuality());
 
         SkAlphaType alphaType = image->alphaType();
         SkColorSpace* colorSpace = as_IB(image)->colorSpace();
 
         if (attemptDrawTexture && can_use_draw_texture(paint)) {
-            draw_texture(fRenderTargetContext.get(), this->clip(), this->ctm(), paint, src,  dst,
+            draw_texture(fRenderTargetContext.get(), this->clip(), ctm, paint, src,  dst,
                          dstClip, aa, aaFlags, constraint, std::move(proxy), alphaType, colorSpace);
             return;
         }
 
         GrTextureAdjuster adjuster(fContext.get(), std::move(proxy), alphaType, pinnedUniqueID,
                                    colorSpace, useDecal);
-        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), this->ctm(),
+        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), ctm,
                               paint, &adjuster, src, dst, dstClip, srcToDst, aa, aaFlags,
                               constraint, /* attempt draw_texture */ false);
         return;
@@ -415,8 +422,7 @@
     // TODO (michaelludwig): Implement this with per-edge AA flags to handle seaming properly
     // instead of going through drawBitmapRect (which will be removed from SkDevice in the future)
     SkBitmap bm;
-    if (this->shouldTileImage(image, &src, constraint, paint.getFilterQuality(), this->ctm(),
-                              srcToDst)) {
+    if (this->shouldTileImage(image, &src, constraint, paint.getFilterQuality(), ctm, srcToDst)) {
         // only support tiling as bitmap at the moment, so force raster-version
         if (!as_IB(image)->getROPixels(&bm)) {
             return;
@@ -427,20 +433,20 @@
 
     // This is the funnel for all non-tiled bitmap/image draw calls. Log a histogram entry.
     SK_HISTOGRAM_BOOLEAN("DrawTiled", false);
-    LogDrawScaleFactor(this->ctm(), srcToDst, paint.getFilterQuality());
+    LogDrawScaleFactor(ctm, srcToDst, paint.getFilterQuality());
 
     // Lazily generated images must get drawn as a texture producer that handles the final
     // texture creation.
     if (image->isLazyGenerated()) {
         GrImageTextureMaker maker(fContext.get(), image, SkImage::kAllow_CachingHint, useDecal);
-        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), this->ctm(),
+        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), ctm,
                               paint, &maker, src, dst, dstClip, srcToDst, aa, aaFlags, constraint,
                               attemptDrawTexture);
         return;
     }
     if (as_IB(image)->getROPixels(&bm)) {
         GrBitmapTextureMaker maker(fContext.get(), bm, useDecal);
-        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), this->ctm(),
+        draw_texture_producer(fContext.get(), fRenderTargetContext.get(), this->clip(), ctm,
                               paint, &maker, src, dst, dstClip, srcToDst, aa, aaFlags, constraint,
                               attemptDrawTexture);
     }
@@ -450,8 +456,9 @@
 
 // For ease-of-use, the temporary API treats null dstClipCounts as if it were the proper sized
 // array, filled with all 0s (so dstClips can be null too)
-void SkGpuDevice::tmp_drawImageSetV2(const SkCanvas::ImageSetEntry set[], int dstClipCounts[],
-                                     int count, const SkPoint dstClips[], const SkPaint& paint,
+void SkGpuDevice::tmp_drawImageSetV3(const SkCanvas::ImageSetEntry set[], int dstClipCounts[],
+                                     int preViewMatrixIdx[], int count, const SkPoint dstClips[],
+                                     const SkMatrix preViewMatrices[], const SkPaint& paint,
                                      SkCanvas::SrcRectConstraint constraint) {
     SkASSERT(count > 0);
 
@@ -461,10 +468,15 @@
         for (int i = 0; i < count; ++i) {
             // Only no clip or quad clip are supported
             SkASSERT(!dstClipCounts || dstClipCounts[i] == 0 || dstClipCounts[i] == 4);
+
+            int xform = preViewMatrixIdx ? preViewMatrixIdx[i] : -1;
+            SkASSERT(xform < 0 || preViewMatrices);
+
             // Always send GrAA::kYes to preserve seaming across tiling in MSAA
             this->drawImageQuad(set[i].fImage.get(), &set[i].fSrcRect, &set[i].fDstRect,
                     (dstClipCounts && dstClipCounts[i] > 0) ? dstClips + dstClipIndex : nullptr,
-                    GrAA::kYes, SkToGrQuadAAFlags(set[i].fAAFlags), paint, constraint);
+                    GrAA::kYes, SkToGrQuadAAFlags(set[i].fAAFlags),
+                    xform < 0 ? nullptr : preViewMatrices + xform, paint, constraint);
             if (dstClipCounts) {
                 dstClipIndex += dstClipCounts[i];
             }
@@ -531,9 +543,14 @@
                 continue;
             }
         }
+
+        int xform = preViewMatrixIdx ? preViewMatrixIdx[i] : -1;
+        SkASSERT(xform < 0 || preViewMatrices);
+
         textures[i].fSrcRect = set[i].fSrcRect;
         textures[i].fDstRect = set[i].fDstRect;
         textures[i].fDstClipQuad = clip;
+        textures[i].fPreViewMatrix = xform < 0 ? nullptr : preViewMatrices + xform;
         textures[i].fAlpha = set[i].fAlpha * paint.getAlphaf();
         textures[i].fAAFlags = SkToGrQuadAAFlags(set[i].fAAFlags);
 
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index 210dfee..9db287a 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -307,10 +307,10 @@
         GrAAType overallAAType = GrAAType::kNone; // aa type maximally compatible with all dst rects
         bool mustFilter = false;
         fCanSkipAllocatorGather = static_cast<unsigned>(true);
-        // All dst rects are transformed by the same view matrix, so their quad types are identical,
-        // unless an entry provides a dstClip that forces quad type to be at least standard.
-        GrQuadType baseQuadType = GrQuadTypeForTransformedRect(viewMatrix);
-        fQuads.reserve(cnt, baseQuadType);
+        // Most dst rects are transformed by the same view matrix, so their quad types start
+        // identical, unless an entry provides a dstClip or additional transform that changes it.
+        // The quad list will automatically adapt to that.
+        fQuads.reserve(cnt, GrQuadTypeForTransformedRect(viewMatrix));
 
         for (unsigned p = 0; p < fProxyCnt; ++p) {
             fProxies[p].fProxy = SkRef(set[p].fProxy.get());
@@ -321,12 +321,17 @@
                 fCanSkipAllocatorGather = static_cast<unsigned>(false);
             }
 
+            SkMatrix ctm = viewMatrix;
+            if (set[p].fPreViewMatrix) {
+                ctm.preConcat(*set[p].fPreViewMatrix);
+            }
+
             // Use dstRect unless dstClip is provided, which is assumed to be a quad
             auto quad = set[p].fDstClipQuad == nullptr ?
-                    GrPerspQuad::MakeFromRect(set[p].fDstRect, viewMatrix) :
-                    GrPerspQuad::MakeFromSkQuad(set[p].fDstClipQuad, viewMatrix);
-            GrQuadType quadType = baseQuadType;
-            if (set[p].fDstClipQuad && baseQuadType != GrQuadType::kPerspective) {
+                    GrPerspQuad::MakeFromRect(set[p].fDstRect, ctm) :
+                    GrPerspQuad::MakeFromSkQuad(set[p].fDstClipQuad, ctm);
+            GrQuadType quadType = GrQuadTypeForTransformedRect(ctm);
+            if (set[p].fDstClipQuad && quadType != GrQuadType::kPerspective) {
                 quadType = GrQuadType::kStandard;
             }
 
@@ -342,7 +347,7 @@
             }
             if (!mustFilter && this->filter() != GrSamplerState::Filter::kNearest) {
                 mustFilter = quadType != GrQuadType::kRect ||
-                             GrTextureOp::GetFilterHasEffect(viewMatrix, set[p].fSrcRect,
+                             GrTextureOp::GetFilterHasEffect(ctm, set[p].fSrcRect,
                                                              set[p].fDstRect);
             }
             float alpha = SkTPin(set[p].fAlpha, 0.f, 1.f);