Revert "Remove support for copyAsDraw in gpu copySurface."

This reverts commit 6565506463db042d3d543a1707f473cdf1ef4e9e.

Reason for revert: seems to break things?

Original change's description:
> Remove support for copyAsDraw in gpu copySurface.
> 
> The major changes on a higher lever are:
> 1) The majority of all copies now go through GrSurfaceProxy::Copy which
> takes in a proxy and returns a new one with the data copied to it. This
> is the most common use case within Ganesh.
> 
> 2) The backend copy calls no longer do draws, require origins to be the
> same, and won't do any swizzling or adjustment of subrects. They are
> all implemented to be dumb copy this data to this other spot.
> 
> 3) The GrSurfaceContext copy call has now been moved to priv and renamed
> copyNoDraw, and a new priv copyAsDraw was added to GrRenderTargetContext.
> 
> 4) WritePixels and ReplaceRenderTarget both need to specifiy the destination
> of copies. They are the only users (besides the GrSurfaceProxy::Copy) which
> call the priv methods on GrSurfaceContext.
> 
> Change-Id: Iaf1eb3a73ccaf39a75af77e281dae594f809186f
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/217459
> Reviewed-by: Brian Salomon <bsalomon@google.com>
> Commit-Queue: Greg Daniel <egdaniel@google.com>

TBR=egdaniel@google.com,bsalomon@google.com,robertphillips@google.com

Change-Id: Id43aa8aa1451e794342e930441d9975b90e6b59f
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/218549
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Greg Daniel <egdaniel@google.com>
diff --git a/src/gpu/GrBackendTextureImageGenerator.cpp b/src/gpu/GrBackendTextureImageGenerator.cpp
index 6320809..4df0bd4 100644
--- a/src/gpu/GrBackendTextureImageGenerator.cpp
+++ b/src/gpu/GrBackendTextureImageGenerator.cpp
@@ -208,10 +208,28 @@
         // because Vulkan will want to do the copy as a draw. All other copies would require a
         // layout change in Vulkan and we do not change the layout of borrowed images.
         GrMipMapped mipMapped = willNeedMipMaps ? GrMipMapped::kYes : GrMipMapped::kNo;
-        SkIRect subset = SkIRect::MakeXYWH(origin.fX, origin.fY, info.width(), info.height());
 
-        return GrSurfaceProxy::Copy(context, proxy.get(), mipMapped, subset, SkBackingFit::kExact,
-                                    SkBudgeted::kYes);
+        GrBackendFormat format = proxy->backendFormat().makeTexture2D();
+        if (!format.isValid()) {
+            return nullptr;
+        }
+
+        sk_sp<GrRenderTargetContext> rtContext(
+            context->priv().makeDeferredRenderTargetContext(
+                format, SkBackingFit::kExact, info.width(), info.height(),
+                proxy->config(), nullptr, 1, mipMapped, proxy->origin(), nullptr,
+                SkBudgeted::kYes));
+
+        if (!rtContext) {
+            return nullptr;
+        }
+
+        SkIRect subset = SkIRect::MakeXYWH(origin.fX, origin.fY, info.width(), info.height());
+        if (!rtContext->copy(proxy.get(), subset, SkIPoint::Make(0, 0))) {
+            return nullptr;
+        }
+
+        return rtContext->asTextureProxyRef();
     }
 }
 
diff --git a/src/gpu/GrCaps.h b/src/gpu/GrCaps.h
index 42d6e76..3c9884e 100644
--- a/src/gpu/GrCaps.h
+++ b/src/gpu/GrCaps.h
@@ -310,7 +310,8 @@
      * copy rect must equal src's bounds.
      */
     virtual bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc,
-                                    bool* rectsMustMatch, bool* disallowSubrect) const = 0;
+                                    GrSurfaceOrigin* origin, bool* rectsMustMatch,
+                                    bool* disallowSubrect) const = 0;
 
     bool validateSurfaceDesc(const GrSurfaceDesc&, GrMipMapped) const;
 
diff --git a/src/gpu/GrContextPriv.cpp b/src/gpu/GrContextPriv.cpp
index a311de0..7945192 100644
--- a/src/gpu/GrContextPriv.cpp
+++ b/src/gpu/GrContextPriv.cpp
@@ -16,7 +16,6 @@
 #include "src/gpu/GrMemoryPool.h"
 #include "src/gpu/GrRenderTargetContext.h"
 #include "src/gpu/GrSurfacePriv.h"
-#include "src/gpu/GrSurfaceContextPriv.h"
 #include "src/gpu/GrTextureContext.h"
 #include "src/gpu/SkGr.h"
 #include "src/gpu/effects/generated/GrConfigConversionEffect.h"
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 7bbedfc..ce151b1 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -222,8 +222,10 @@
     return buffer;
 }
 
-bool GrGpu::copySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                        const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) {
+bool GrGpu::copySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                        GrSurface* src, GrSurfaceOrigin srcOrigin,
+                        const SkIRect& srcRect, const SkIPoint& dstPoint,
+                        bool canDiscardOutsideDstRect) {
     GR_CREATE_TRACE_MARKER_CONTEXT("GrGpu", "copySurface", fContext);
     SkASSERT(dst && src);
 
@@ -233,7 +235,8 @@
 
     this->handleDirtyContext();
 
-    return this->onCopySurface(dst, src, srcRect, dstPoint, canDiscardOutsideDstRect);
+    return this->onCopySurface(dst, dstOrigin, src, srcOrigin, srcRect, dstPoint,
+                               canDiscardOutsideDstRect);
 }
 
 bool GrGpu::readPixels(GrSurface* surface, int left, int top, int width, int height,
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 62d435e..467e1db 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -253,11 +253,13 @@
     // Called to perform a surface to surface copy. Fallbacks to issuing a draw from the src to dst
     // take place at the GrOpList level and this function implement faster copy paths. The rect
     // and point are pre-clipped. The src rect and implied dst rect are guaranteed to be within the
-    // src/dst bounds and non-empty. They must also be in their exact device space coords, including
-    // already being transformed for origin if need be. If canDiscardOutsideDstRect is set to true
-    // then we don't need to preserve any data on the dst surface outside of the copy.
-    bool copySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                     const SkIPoint& dstPoint, bool canDiscardOutsideDstRect = false);
+    // src/dst bounds and non-empty. If canDiscardOutsideDstRect is set to true then we don't need
+    // to preserve any data on the dst surface outside of the copy.
+    bool copySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                     GrSurface* src, GrSurfaceOrigin srcOrigin,
+                     const SkIRect& srcRect,
+                     const SkIPoint& dstPoint,
+                     bool canDiscardOutsideDstRect = false);
 
     // Queries the per-pixel HW sample locations for the given render target, and then finds or
     // assigns a key that uniquely identifies the sample pattern. The actual sample locations can be
@@ -535,8 +537,10 @@
     virtual bool onRegenerateMipMapLevels(GrTexture*) = 0;
 
     // overridden by backend specific derived class to perform the copy surface
-    virtual bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                               const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) = 0;
+    virtual bool onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                               GrSurface* src, GrSurfaceOrigin srcOrigin,
+                               const SkIRect& srcRect, const SkIPoint& dstPoint,
+                               bool canDiscardOutsideDstRect) = 0;
 
     virtual void onFinishFlush(GrSurfaceProxy*[], int n, SkSurface::BackendSurfaceAccess access,
                                const GrFlushInfo&, const GrPrepareForExternalIORequests&) = 0;
diff --git a/src/gpu/GrGpuCommandBuffer.h b/src/gpu/GrGpuCommandBuffer.h
index e96c610..abb3c4d 100644
--- a/src/gpu/GrGpuCommandBuffer.h
+++ b/src/gpu/GrGpuCommandBuffer.h
@@ -30,9 +30,9 @@
     virtual ~GrGpuCommandBuffer() {}
 
     // Copy src into current surface owned by either a GrGpuTextureCommandBuffer or
-    // GrGpuRenderTargetCommandBuffer. The srcRect and dstPoint must be in dst coords and have
-    // already been adjusted for any origin flips.
-    virtual void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) = 0;
+    // GrGpuRenderTargetCommandBuffer.
+    virtual void copy(GrSurface* src, GrSurfaceOrigin srcOrigin,
+                      const SkIRect& srcRect, const SkIPoint& dstPoint) = 0;
     // Initiates a transfer from the surface owned by the command buffer to the GrGpuBuffer.
     virtual void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
                               GrGpuBuffer* transferBuffer, size_t offset) = 0;
@@ -47,14 +47,19 @@
     void set(GrTexture* texture, GrSurfaceOrigin origin) {
         SkASSERT(!fTexture);
 
+        fOrigin = origin;
         fTexture = texture;
     }
 
 protected:
-    GrGpuTextureCommandBuffer() : fTexture(nullptr) {}
+    GrGpuTextureCommandBuffer() : fOrigin(kTopLeft_GrSurfaceOrigin), fTexture(nullptr) {}
 
-    GrGpuTextureCommandBuffer(GrTexture* texture, GrSurfaceOrigin origin) : fTexture(texture) {}
+    GrGpuTextureCommandBuffer(GrTexture* texture, GrSurfaceOrigin origin)
+            : fOrigin(origin)
+            , fTexture(texture) {
+    }
 
+    GrSurfaceOrigin fOrigin;
     GrTexture*      fTexture;
 
 private:
diff --git a/src/gpu/GrRenderTargetContext.cpp b/src/gpu/GrRenderTargetContext.cpp
index d44da83..6b77d30 100644
--- a/src/gpu/GrRenderTargetContext.cpp
+++ b/src/gpu/GrRenderTargetContext.cpp
@@ -1785,17 +1785,27 @@
     int srcH = srcRect.height();
     int srcX = srcRect.fLeft;
     int srcY = srcRect.fTop;
-    sk_sp<GrTextureProxy> texProxy = sk_ref_sp(fRenderTargetProxy->asTextureProxy());
+    sk_sp<GrSurfaceContext> srcContext = sk_ref_sp(this);
     SkCanvas::SrcRectConstraint constraint = SkCanvas::kStrict_SrcRectConstraint;
-    if (!texProxy) {
-        texProxy = GrSurfaceProxy::Copy(fContext, fRenderTargetProxy.get(), GrMipMapped::kNo,
-                                        srcRect, SkBackingFit::kApprox, SkBudgeted::kNo);
-        if (!texProxy) {
+    if (!this->asTextureProxy()) {
+        GrSurfaceDesc desc;
+        desc.fWidth = srcW;
+        desc.fHeight = srcH;
+        desc.fConfig = fRenderTargetProxy->config();
+        auto sContext = direct->priv().makeDeferredSurfaceContext(
+                fRenderTargetProxy->backendFormat().makeTexture2D(), desc, this->origin(),
+                GrMipMapped::kNo, SkBackingFit::kApprox, SkBudgeted::kNo,
+                this->colorSpaceInfo().refColorSpace());
+        if (!sContext) {
+            return nullptr;
+        }
+        if (!sContext->copy(fRenderTargetProxy.get(), srcRect, {0, 0})) {
             return nullptr;
         }
         srcX = 0;
         srcY = 0;
         constraint = SkCanvas::kFast_SrcRectConstraint;
+        srcContext = std::move(sContext);
     }
 
     float sx = (float)info.width() / srcW;
@@ -1814,35 +1824,33 @@
         stepsY = sy != 1.f;
     }
     SkASSERT(stepsX || stepsY);
-    auto currentColorSpace = this->colorSpaceInfo().refColorSpace();
     // Assume we should ignore the rescale linear request if the surface has no color space since
     // it's unclear how we'd linearize from an unknown color space.
     if (rescaleGamma == SkSurface::RescaleGamma::kLinear &&
-        currentColorSpace.get() && !currentColorSpace->gammaIsLinear()) {
-        auto cs = currentColorSpace->makeLinearGamma();
+        srcContext->colorSpaceInfo().colorSpace() &&
+        !srcContext->colorSpaceInfo().colorSpace()->gammaIsLinear()) {
+        auto cs = srcContext->colorSpaceInfo().colorSpace()->makeLinearGamma();
         auto backendFormat = this->caps()->getBackendFormatFromGrColorType(GrColorType::kRGBA_F16,
                                                                            GrSRGBEncoded::kNo);
-        auto xform = GrColorSpaceXform::Make(currentColorSpace.get(), kPremul_SkAlphaType, cs.get(),
-                                             kPremul_SkAlphaType);
+        auto xform = GrColorSpaceXform::Make(srcContext->colorSpaceInfo().colorSpace(),
+                                             kPremul_SkAlphaType, cs.get(), kPremul_SkAlphaType);
         // We'll fall back to kRGBA_8888 if half float not supported.
         auto linearRTC = fContext->priv().makeDeferredRenderTargetContextWithFallback(
-                backendFormat, SkBackingFit::kExact, srcW, srcH, kRGBA_half_GrPixelConfig, cs, 1,
-                GrMipMapped::kNo, kTopLeft_GrSurfaceOrigin);
+                backendFormat, SkBackingFit::kExact, srcW, srcH, kRGBA_half_GrPixelConfig,
+                std::move(cs), 1, GrMipMapped::kNo, kTopLeft_GrSurfaceOrigin);
         if (!linearRTC) {
             return nullptr;
         }
-        linearRTC->drawTexture(GrNoClip(), texProxy,
+        linearRTC->drawTexture(GrNoClip(), srcContext->asTextureProxyRef(),
                                GrSamplerState::Filter::kNearest, SkBlendMode::kSrc,
                                SK_PMColor4fWHITE, SkRect::Make(srcRect), SkRect::MakeWH(srcW, srcH),
                                GrAA::kNo, GrQuadAAFlags::kNone, constraint, SkMatrix::I(),
                                std::move(xform));
-        texProxy = linearRTC->asTextureProxyRef();
-        currentColorSpace = std::move(cs);
+        srcContext = std::move(linearRTC);
         srcX = 0;
         srcY = 0;
         constraint = SkCanvas::kFast_SrcRectConstraint;
     }
-    sk_sp<GrRenderTargetContext> currRTC;
     while (stepsX || stepsY) {
         int nextW = info.width();
         int nextH = info.height();
@@ -1864,22 +1872,23 @@
             }
             --stepsY;
         }
-        GrBackendFormat backendFormat = texProxy->backendFormat().makeTexture2D();
-        GrPixelConfig config = texProxy->config();
-        auto cs = currentColorSpace;
+        GrBackendFormat backendFormat =
+                srcContext->asSurfaceProxy()->backendFormat().makeTexture2D();
+        GrPixelConfig config = srcContext->asSurfaceProxy()->config();
+        auto cs = srcContext->colorSpaceInfo().refColorSpace();
         sk_sp<GrColorSpaceXform> xform;
         if (!stepsX && !stepsY) {
             // Might as well fold conversion to final info in the last step.
             backendFormat = this->caps()->getBackendFormatFromColorType(info.colorType());
             config = this->caps()->getConfigFromBackendFormat(backendFormat, info.colorType());
             cs = info.refColorSpace();
-            xform = GrColorSpaceXform::Make(this->colorSpaceInfo().colorSpace(),
+            xform = GrColorSpaceXform::Make(srcContext->colorSpaceInfo().colorSpace(),
                                             kPremul_SkAlphaType, cs.get(), info.alphaType());
         }
-        currRTC = fContext->priv().makeDeferredRenderTargetContextWithFallback(
+        auto nextRTC = fContext->priv().makeDeferredRenderTargetContextWithFallback(
                 backendFormat, SkBackingFit::kExact, nextW, nextH, config, std::move(cs), 1,
                 GrMipMapped::kNo, kTopLeft_GrSurfaceOrigin);
-        if (!currRTC) {
+        if (!nextRTC) {
             return nullptr;
         }
         auto dstRect = SkRect::MakeWH(nextW, nextH);
@@ -1893,12 +1902,14 @@
             } else if (nextH == srcH) {
                 dir = GrBicubicEffect::Direction::kX;
             }
-            if (srcW != texProxy->width() || srcH != texProxy->height()) {
+            if (srcW != srcContext->width() || srcH != srcContext->height()) {
                 auto domain = GrTextureDomain::MakeTexelDomain(
                         SkIRect::MakeXYWH(srcX, srcY, srcW, srcH), GrTextureDomain::kClamp_Mode);
-                fp = GrBicubicEffect::Make(texProxy, matrix, domain, dir, kPremul_SkAlphaType);
+                fp = GrBicubicEffect::Make(srcContext->asTextureProxyRef(), matrix, domain, dir,
+                                           kPremul_SkAlphaType);
             } else {
-                fp = GrBicubicEffect::Make(texProxy, matrix, dir, kPremul_SkAlphaType);
+                fp = GrBicubicEffect::Make(srcContext->asTextureProxyRef(), matrix, dir,
+                                           kPremul_SkAlphaType);
             }
             if (xform) {
                 fp = GrColorSpaceXformEffect::Make(std::move(fp), std::move(xform));
@@ -1906,24 +1917,26 @@
             GrPaint paint;
             paint.addColorFragmentProcessor(std::move(fp));
             paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
-            currRTC->drawFilledRect(GrNoClip(), std::move(paint), GrAA::kNo, SkMatrix::I(),
+            nextRTC->drawFilledRect(GrNoClip(), std::move(paint), GrAA::kNo, SkMatrix::I(),
                                     dstRect);
         } else {
             auto filter = rescaleQuality == kNone_SkFilterQuality ? GrSamplerState::Filter::kNearest
                                                                   : GrSamplerState::Filter::kBilerp;
             auto srcSubset = SkRect::MakeXYWH(srcX, srcY, srcW, srcH);
-            currRTC->drawTexture(GrNoClip(), texProxy, filter, SkBlendMode::kSrc, SK_PMColor4fWHITE,
-                                 srcSubset, dstRect, GrAA::kNo, GrQuadAAFlags::kNone, constraint,
-                                 SkMatrix::I(), std::move(xform));
+            nextRTC->drawTexture(GrNoClip(), srcContext->asTextureProxyRef(), filter,
+                                 SkBlendMode::kSrc, SK_PMColor4fWHITE, srcSubset, dstRect,
+                                 GrAA::kNo, GrQuadAAFlags::kNone, constraint, SkMatrix::I(),
+                                 std::move(xform));
         }
-        texProxy = currRTC->asTextureProxyRef();
+        srcContext = std::move(nextRTC);
         srcX = srcY = 0;
         srcW = nextW;
         srcH = nextH;
         constraint = SkCanvas::kFast_SrcRectConstraint;
     }
-    SkASSERT(currRTC);
-    return currRTC;
+    auto result = sk_ref_sp(srcContext->asRenderTargetContext());
+    SkASSERT(result);
+    return result;
 }
 
 void GrRenderTargetContext::asyncRescaleAndReadPixels(
@@ -1988,13 +2001,24 @@
             SkRect srcRectToDraw = SkRect::Make(srcRect);
             // If the src is not texturable first try to make a copy to a texture.
             if (!texProxy) {
-                texProxy = GrSurfaceProxy::Copy(fContext, fRenderTargetProxy.get(),
-                                                GrMipMapped::kNo, srcRect, SkBackingFit::kApprox,
-                                                SkBudgeted::kNo);
-                if (!texProxy) {
+                GrSurfaceDesc desc;
+                desc.fWidth = srcRect.width();
+                desc.fHeight = srcRect.height();
+                desc.fConfig = fRenderTargetProxy->config();
+                auto sContext = direct->priv().makeDeferredSurfaceContext(
+                        backendFormat, desc, this->origin(), GrMipMapped::kNo,
+                        SkBackingFit::kApprox, SkBudgeted::kNo,
+                        this->colorSpaceInfo().refColorSpace());
+                if (!sContext) {
                     callback(context, nullptr, 0);
                     return;
                 }
+                if (!sContext->copy(fRenderTargetProxy.get(), srcRect, {0, 0})) {
+                    callback(context, nullptr, 0);
+                    return;
+                }
+                texProxy = sk_ref_sp(sContext->asTextureProxy());
+                SkASSERT(texProxy);
                 srcRectToDraw = SkRect::MakeWH(srcRect.width(), srcRect.height());
             }
             rtc = direct->priv().makeDeferredRenderTargetContext(
@@ -2544,10 +2568,12 @@
     GrSurfaceDesc desc;
     bool rectsMustMatch = false;
     bool disallowSubrect = false;
-    if (!this->caps()->initDescForDstCopy(rtProxy, &desc, &rectsMustMatch,
+    GrSurfaceOrigin origin;
+    if (!this->caps()->initDescForDstCopy(rtProxy, &desc, &origin, &rectsMustMatch,
                                           &disallowSubrect)) {
         desc.fFlags = kRenderTarget_GrSurfaceFlag;
         desc.fConfig = rtProxy->config();
+        origin = rtProxy->origin();
     }
 
     if (!disallowSubrect) {
@@ -2556,55 +2582,36 @@
 
     SkIPoint dstPoint, dstOffset;
     SkBackingFit fit;
-    GrSurfaceProxy::RectsMustMatch matchRects;
     if (rectsMustMatch) {
         desc.fWidth = rtProxy->width();
         desc.fHeight = rtProxy->height();
         dstPoint = {copyRect.fLeft, copyRect.fTop};
         dstOffset = {0, 0};
         fit = SkBackingFit::kExact;
-        matchRects = GrSurfaceProxy::RectsMustMatch::kYes;
     } else {
         desc.fWidth = copyRect.width();
         desc.fHeight = copyRect.height();
         dstPoint = {0, 0};
         dstOffset = {copyRect.fLeft, copyRect.fTop};
         fit = SkBackingFit::kApprox;
-        matchRects = GrSurfaceProxy::RectsMustMatch::kNo;
     }
 
-    sk_sp<GrTextureProxy> newProxy = GrSurfaceProxy::Copy(fContext, rtProxy, GrMipMapped::kNo,
-                                                          copyRect, fit, SkBudgeted::kYes,
-                                                          matchRects);
+    SkASSERT(rtProxy->backendFormat().textureType() == GrTextureType::k2D);
+    const GrBackendFormat& format = rtProxy->backendFormat();
+    sk_sp<GrSurfaceContext> sContext = fContext->priv().makeDeferredSurfaceContext(
+            format, desc, origin, GrMipMapped::kNo, fit, SkBudgeted::kYes,
+            sk_ref_sp(this->colorSpaceInfo().colorSpace()));
+    if (!sContext) {
+        SkDebugf("setupDstTexture: surfaceContext creation failed.\n");
+        return false;
+    }
 
-    dstProxy->setProxy(std::move(newProxy));
+    if (!sContext->copy(rtProxy, copyRect, dstPoint)) {
+        SkDebugf("setupDstTexture: copy failed.\n");
+        return false;
+    }
+
+    dstProxy->setProxy(sContext->asTextureProxyRef());
     dstProxy->setOffset(dstOffset);
     return true;
 }
-
-bool GrRenderTargetContext::blitTexture(GrTextureProxy* src, const SkIRect& srcRect,
-                                        const SkIPoint& dstPoint) {
-    SkIRect clippedSrcRect;
-    SkIPoint clippedDstPoint;
-    if (!GrClipSrcRectAndDstPoint(this->asSurfaceProxy()->isize(), src->isize(), srcRect, dstPoint,
-                                  &clippedSrcRect, &clippedDstPoint)) {
-        return false;
-    }
-
-    GrPaint paint;
-    paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
-    auto fp = GrSimpleTextureEffect::Make(sk_ref_sp(src->asTextureProxy()),
-                                          SkMatrix::I());
-    if (!fp) {
-        return false;
-    }
-    paint.addColorFragmentProcessor(std::move(fp));
-
-    this->fillRectToRect(
-            GrNoClip(), std::move(paint), GrAA::kNo, SkMatrix::I(),
-            SkRect::MakeXYWH(clippedDstPoint.fX, clippedDstPoint.fY, clippedSrcRect.width(),
-                             clippedSrcRect.height()),
-            SkRect::Make(clippedSrcRect));
-    return true;
-}
-
diff --git a/src/gpu/GrRenderTargetContext.h b/src/gpu/GrRenderTargetContext.h
index 5e68e8d..528a670 100644
--- a/src/gpu/GrRenderTargetContext.h
+++ b/src/gpu/GrRenderTargetContext.h
@@ -397,13 +397,6 @@
                           const SkRect& dst);
 
     /**
-     * Draws the src texture with no matrix. The dstRect is the dstPoint with the width and height
-     * of the srcRect. The srcRect and dstRect are clipped to the bounds of the src and dst surfaces
-     * respectively.
-     */
-    bool blitTexture(GrTextureProxy* src, const SkIRect& srcRect, const SkIPoint& dstPoint);
-
-    /**
      * Adds the necessary signal and wait semaphores and adds the passed in SkDrawable to the
      * command stream.
      */
diff --git a/src/gpu/GrSurfaceContext.cpp b/src/gpu/GrSurfaceContext.cpp
index bedb5c3..2b6987c 100644
--- a/src/gpu/GrSurfaceContext.cpp
+++ b/src/gpu/GrSurfaceContext.cpp
@@ -5,8 +5,6 @@
  * found in the LICENSE file.
  */
 
-#include "src/gpu/GrSurfaceContext.h"
-
 #include "include/private/GrAuditTrail.h"
 #include "include/private/GrOpList.h"
 #include "include/private/GrRecordingContext.h"
@@ -17,7 +15,7 @@
 #include "src/gpu/GrGpu.h"
 #include "src/gpu/GrRecordingContextPriv.h"
 #include "src/gpu/GrRenderTargetContext.h"
-#include "src/gpu/GrSurfaceContextPriv.h"
+#include "src/gpu/GrSurfaceContext.h"
 #include "src/gpu/GrSurfacePriv.h"
 #include "src/gpu/GrTextureContext.h"
 #include "src/gpu/SkGr.h"
@@ -373,7 +371,7 @@
         }
 
         auto tempProxy = direct->priv().proxyProvider()->createProxy(
-                format, desc, dstProxy->origin(), SkBackingFit::kApprox, SkBudgeted::kYes);
+                format, desc, kTopLeft_GrSurfaceOrigin, SkBackingFit::kApprox, SkBudgeted::kYes);
         if (!tempProxy) {
             return false;
         }
@@ -395,7 +393,7 @@
         }
 
         if (this->asRenderTargetContext()) {
-            std::unique_ptr<GrFragmentProcessor> fp;
+        std::unique_ptr<GrFragmentProcessor> fp;
             if (canvas2DFastPath) {
                 fp = direct->priv().createUPMToPMEffect(
                         GrSimpleTextureEffect::Make(std::move(tempProxy), SkMatrix::I()));
@@ -417,9 +415,10 @@
         } else {
             SkIRect srcRect = SkIRect::MakeWH(width, height);
             SkIPoint dstPoint = SkIPoint::Make(left, top);
-            if (this->copy(tempProxy.get(), srcRect, dstPoint)) {
+            if (!caps->canCopySurface(this->asSurfaceProxy(), tempProxy.get(), srcRect, dstPoint)) {
                 return false;
             }
+            SkAssertResult(this->copy(tempProxy.get(), srcRect, dstPoint));
         }
         return true;
     }
@@ -526,17 +525,13 @@
     ASSERT_SINGLE_OWNER
     RETURN_FALSE_IF_ABANDONED
     SkDEBUGCODE(this->validate();)
-    GR_AUDIT_TRAIL_AUTO_FRAME(this->auditTrail(), "GrSurfaceContextPriv::copy");
+    GR_AUDIT_TRAIL_AUTO_FRAME(this->auditTrail(), "GrSurfaceContext::copy");
 
-    SkASSERT(src->backendFormat().textureType() != GrTextureType::kExternal);
-    SkASSERT(src->origin() == this->asSurfaceProxy()->origin());
-
-    GrSurfaceProxy* dst = this->asSurfaceProxy();
-
-    if (!this->fContext->priv().caps()->canCopySurface(dst, src, srcRect, dstPoint)) {
+    if (!fContext->priv().caps()->canCopySurface(this->asSurfaceProxy(), src, srcRect,
+                                                        dstPoint)) {
         return false;
     }
 
-    return this->getOpList()->copySurface(this->fContext, dst, src, srcRect, dstPoint);
+    return this->getOpList()->copySurface(fContext, this->asSurfaceProxy(),
+                                          src, srcRect, dstPoint);
 }
-
diff --git a/src/gpu/GrSurfaceContext.h b/src/gpu/GrSurfaceContext.h
index e72364c..3cbb838 100644
--- a/src/gpu/GrSurfaceContext.h
+++ b/src/gpu/GrSurfaceContext.h
@@ -39,6 +39,26 @@
     int width() const { return this->asSurfaceProxy()->width(); }
     int height() const { return this->asSurfaceProxy()->height(); }
 
+    /*
+     * Copy 'src' into the proxy backing this context
+     * @param src       src of pixels
+     * @param srcRect   the subset of 'src' to copy
+     * @param dstPoint  the origin of the 'srcRect' in the destination coordinate space
+     * @return          true if the copy succeeded; false otherwise
+     *
+     * Note: Notionally, 'srcRect' is clipped to 'src's extent with 'dstPoint' being adjusted.
+     *       Then the 'srcRect' offset by 'dstPoint' is clipped against the dst's extent.
+     *       The end result is only valid src pixels and dst pixels will be touched but the copied
+     *       regions will not be shifted.
+     */
+    bool copy(GrSurfaceProxy* src, const SkIRect& srcRect, const SkIPoint& dstPoint);
+
+    bool copy(GrSurfaceProxy* src) {
+        return this->copy(src,
+                          SkIRect::MakeWH(src->width(), src->height()),
+                          SkIPoint::Make(0, 0));
+    }
+
    /**
     * These flags can be used with the read/write pixels functions below.
     */
@@ -114,17 +134,6 @@
     GrSurfaceContextPriv surfPriv();
     const GrSurfaceContextPriv surfPriv() const;
 
-#if GR_TEST_UTILS
-    bool testCopy(GrSurfaceProxy* src, const SkIRect& srcRect, const SkIPoint& dstPoint) {
-        return this->copy(src, srcRect, dstPoint);
-    }
-
-    bool testCopy(GrSurfaceProxy* src) {
-        return this->copy(src);
-    }
-#endif
-
-
 protected:
     friend class GrSurfaceContextPriv;
 
@@ -141,29 +150,6 @@
     GrRecordingContext* fContext;
 
 private:
-    friend class GrSurfaceProxy; // for copy
-
-    /**
-     * Copy 'src' into the proxy backing this context. This call will not do any draw fallback.
-     * Currently only writePixels and replaceRenderTarget call this directly. All other copies
-     * should go through GrSurfaceProxy::Copy.
-     * @param src       src of pixels
-     * @param srcRect   the subset of 'src' to copy
-     * @param dstPoint  the origin of the 'srcRect' in the destination coordinate space
-     * @return          true if the copy succeeded; false otherwise
-     *
-     * Note: Notionally, 'srcRect' is clipped to 'src's extent with 'dstPoint' being adjusted.
-     *       Then the 'srcRect' offset by 'dstPoint' is clipped against the dst's extent.
-     *       The end result is only valid src pixels and dst pixels will be touched but the copied
-     *       regions will not be shifted. The 'src' must have the same origin as the backing proxy
-     *       of fSurfaceContext.
-     */
-    bool copy(GrSurfaceProxy* src, const SkIRect& srcRect, const SkIPoint& dstPoint);
-
-    bool copy(GrSurfaceProxy* src) {
-        return this->copy(src, SkIRect::MakeWH(src->width(), src->height()), SkIPoint::Make(0, 0));
-    }
-
     bool writePixelsImpl(GrContext* direct, int left, int top, int width, int height,
                          GrColorType srcColorType, SkColorSpace* srcColorSpace,
                          const void* srcBuffer, size_t srcRowBytes, uint32_t pixelOpsFlags);
diff --git a/src/gpu/GrSurfaceProxy.cpp b/src/gpu/GrSurfaceProxy.cpp
index a699ec9..f459e02 100644
--- a/src/gpu/GrSurfaceProxy.cpp
+++ b/src/gpu/GrSurfaceProxy.cpp
@@ -12,13 +12,11 @@
 #include "include/private/GrOpList.h"
 #include "include/private/GrRecordingContext.h"
 #include "src/gpu/GrCaps.h"
-#include "src/gpu/GrClip.h"
 #include "src/gpu/GrContextPriv.h"
 #include "src/gpu/GrGpuResourcePriv.h"
 #include "src/gpu/GrProxyProvider.h"
 #include "src/gpu/GrRecordingContextPriv.h"
 #include "src/gpu/GrSurfaceContext.h"
-#include "src/gpu/GrSurfaceContextPriv.h"
 #include "src/gpu/GrSurfacePriv.h"
 #include "src/gpu/GrTexturePriv.h"
 #include "src/gpu/GrTextureRenderTargetProxy.h"
@@ -337,56 +335,33 @@
                                            GrMipMapped mipMapped,
                                            SkIRect srcRect,
                                            SkBackingFit fit,
-                                           SkBudgeted budgeted,
-                                           RectsMustMatch rectsMustMatch) {
+                                           SkBudgeted budgeted) {
     SkASSERT(LazyState::kFully != src->lazyInstantiationState());
-    GrSurfaceDesc dstDesc;
-    dstDesc.fConfig = src->config();
-
-    SkIPoint dstPoint;
-    if (rectsMustMatch == RectsMustMatch::kYes) {
-        dstDesc.fWidth = src->width();
-        dstDesc.fHeight = src->height();
-        dstPoint = {srcRect.fLeft, srcRect.fTop};
-    } else {
-        dstDesc.fWidth = srcRect.width();
-        dstDesc.fHeight = srcRect.height();
-        dstPoint = {0, 0};
-    }
-
     if (!srcRect.intersect(SkIRect::MakeWH(src->width(), src->height()))) {
         return nullptr;
     }
 
-    if (src->backendFormat().textureType() != GrTextureType::kExternal) {
-        sk_sp<GrSurfaceContext> dstContext(context->priv().makeDeferredSurfaceContext(
-                src->backendFormat().makeTexture2D(), dstDesc, src->origin(), mipMapped, fit,
-                budgeted));
-        if (!dstContext) {
-            return nullptr;
-        }
-        if (context->priv().caps()->canCopySurface(src, dstContext->asSurfaceProxy(), srcRect,
-                                                   dstPoint)) {
-            SkAssertResult(dstContext->copy(std::move(src), srcRect, dstPoint));
-            return dstContext->asTextureProxyRef();
-        }
-    }
-    if (src->asTextureProxy()) {
-        GrBackendFormat format = src->backendFormat().makeTexture2D();
-        if (!format.isValid()) {
-            return nullptr;
-        }
+    GrSurfaceDesc dstDesc;
+    dstDesc.fWidth = srcRect.width();
+    dstDesc.fHeight = srcRect.height();
+    dstDesc.fConfig = src->config();
 
-        sk_sp<GrRenderTargetContext> dstContext = context->priv().makeDeferredRenderTargetContext(
-                format, fit, dstDesc.fWidth, dstDesc.fHeight, dstDesc.fConfig, nullptr, 1,
-                mipMapped, src->origin(), nullptr, budgeted);
-
-        if (dstContext && dstContext->blitTexture(src->asTextureProxy(), srcRect, dstPoint)) {
-            return dstContext->asTextureProxyRef();
-        }
+    GrBackendFormat format = src->backendFormat().makeTexture2D();
+    if (!format.isValid()) {
+        return nullptr;
     }
-    // Can't use backend copies or draws.
-    return nullptr;
+
+    sk_sp<GrSurfaceContext> dstContext(context->priv().makeDeferredSurfaceContext(
+            format, dstDesc, src->origin(), mipMapped, fit, budgeted));
+    if (!dstContext) {
+        return nullptr;
+    }
+
+    if (!dstContext->copy(src, srcRect, SkIPoint::Make(0, 0))) {
+        return nullptr;
+    }
+
+    return dstContext->asTextureProxyRef();
 }
 
 sk_sp<GrTextureProxy> GrSurfaceProxy::Copy(GrRecordingContext* context, GrSurfaceProxy* src,
@@ -397,6 +372,29 @@
                 budgeted);
 }
 
+sk_sp<GrSurfaceContext> GrSurfaceProxy::TestCopy(GrRecordingContext* context,
+                                                 const GrSurfaceDesc& dstDesc,
+                                                 GrSurfaceOrigin origin, GrSurfaceProxy* srcProxy) {
+    SkASSERT(LazyState::kFully != srcProxy->lazyInstantiationState());
+
+    GrBackendFormat format = srcProxy->backendFormat().makeTexture2D();
+    if (!format.isValid()) {
+        return nullptr;
+    }
+
+    sk_sp<GrSurfaceContext> dstContext(context->priv().makeDeferredSurfaceContext(
+            format, dstDesc, origin, GrMipMapped::kNo, SkBackingFit::kExact, SkBudgeted::kYes));
+    if (!dstContext) {
+        return nullptr;
+    }
+
+    if (!dstContext->copy(srcProxy)) {
+        return nullptr;
+    }
+
+    return dstContext;
+}
+
 void GrSurfaceProxyPriv::exactify() {
     SkASSERT(GrSurfaceProxy::LazyState::kFully != fProxy->lazyInstantiationState());
     if (this->isExact()) {
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index a2b45fd..c10b696 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -254,11 +254,7 @@
         if (this->context()->abandoned()) {
             return;
         }
-
-        SkASSERT(fRenderTargetContext->asTextureProxy());
-        SkAssertResult(rtc->blitTexture(fRenderTargetContext->asTextureProxy(),
-                                        SkIRect::MakeWH(this->width(), this->height()),
-                                        SkIPoint::Make(0,0)));
+        rtc->copy(fRenderTargetContext->asSurfaceProxy());
     }
 
     fRenderTargetContext = std::move(rtc);
diff --git a/src/gpu/SkGr.cpp b/src/gpu/SkGr.cpp
index 27a1669..5f0e9dc 100644
--- a/src/gpu/SkGr.cpp
+++ b/src/gpu/SkGr.cpp
@@ -133,8 +133,31 @@
         return nullptr;
     }
 
-    return GrSurfaceProxy::Copy(ctx, baseProxy, GrMipMapped::kYes, SkBackingFit::kExact,
-                                SkBudgeted::kYes);
+    GrProxyProvider* proxyProvider = ctx->priv().proxyProvider();
+    GrSurfaceDesc desc;
+    desc.fFlags = kNone_GrSurfaceFlags;
+    desc.fWidth = baseProxy->width();
+    desc.fHeight = baseProxy->height();
+    desc.fConfig = baseProxy->config();
+    desc.fSampleCnt = 1;
+
+    GrBackendFormat format = baseProxy->backendFormat().makeTexture2D();
+    if (!format.isValid()) {
+        return nullptr;
+    }
+
+    sk_sp<GrTextureProxy> proxy =
+            proxyProvider->createMipMapProxy(format, desc, baseProxy->origin(), SkBudgeted::kYes);
+    if (!proxy) {
+        return nullptr;
+    }
+
+    // Copy the base layer to our proxy
+    sk_sp<GrSurfaceContext> sContext = ctx->priv().makeWrappedSurfaceContext(proxy);
+    SkASSERT(sContext);
+    SkAssertResult(sContext->copy(baseProxy));
+
+    return proxy;
 }
 
 sk_sp<GrTextureProxy> GrRefCachedBitmapTextureProxy(GrRecordingContext* ctx,
diff --git a/src/gpu/geometry/GrRect.h b/src/gpu/geometry/GrRect.h
index 2a3e72b..064d539 100644
--- a/src/gpu/geometry/GrRect.h
+++ b/src/gpu/geometry/GrRect.h
@@ -82,60 +82,4 @@
     SkMatrix rectTransform = SkMatrix::MakeRectToRect(inRect, outRect, SkMatrix::kFill_ScaleToFit);
     rectTransform.mapPoints(outPts, inPts, ptCount);
 }
-
-/**
- * Clips the srcRect and the dstPoint to the bounds of the srcSize and dstSize respectively. Returns
- * true if the srcRect and dstRect intersect the srcRect and dst rect (dstPoint with srcRect
- * width/height). Returns false otherwise. The clipped values are returned in clippedSrcRect and
- * clippedDstPoint.
- */
-static inline bool GrClipSrcRectAndDstPoint(const SkISize& dstSize,
-                                            const SkISize& srcSize,
-                                            const SkIRect& srcRect,
-                                            const SkIPoint& dstPoint,
-                                            SkIRect* clippedSrcRect,
-                                            SkIPoint* clippedDstPoint) {
-    *clippedSrcRect = srcRect;
-    *clippedDstPoint = dstPoint;
-
-    // clip the left edge to src and dst bounds, adjusting dstPoint if necessary
-    if (clippedSrcRect->fLeft < 0) {
-        clippedDstPoint->fX -= clippedSrcRect->fLeft;
-        clippedSrcRect->fLeft = 0;
-    }
-    if (clippedDstPoint->fX < 0) {
-        clippedSrcRect->fLeft -= clippedDstPoint->fX;
-        clippedDstPoint->fX = 0;
-    }
-
-    // clip the top edge to src and dst bounds, adjusting dstPoint if necessary
-    if (clippedSrcRect->fTop < 0) {
-        clippedDstPoint->fY -= clippedSrcRect->fTop;
-        clippedSrcRect->fTop = 0;
-    }
-    if (clippedDstPoint->fY < 0) {
-        clippedSrcRect->fTop -= clippedDstPoint->fY;
-        clippedDstPoint->fY = 0;
-    }
-
-    // clip the right edge to the src and dst bounds.
-    if (clippedSrcRect->fRight > srcSize.width()) {
-        clippedSrcRect->fRight = srcSize.width();
-    }
-    if (clippedDstPoint->fX + clippedSrcRect->width() > dstSize.width()) {
-        clippedSrcRect->fRight = clippedSrcRect->fLeft + dstSize.width() - clippedDstPoint->fX;
-    }
-
-    // clip the bottom edge to the src and dst bounds.
-    if (clippedSrcRect->fBottom > srcSize.height()) {
-        clippedSrcRect->fBottom = srcSize.height();
-    }
-    if (clippedDstPoint->fY + clippedSrcRect->height() > dstSize.height()) {
-        clippedSrcRect->fBottom = clippedSrcRect->fTop + dstSize.height() - clippedDstPoint->fY;
-    }
-
-    // The above clipping steps may have inverted the rect if it didn't intersect either the src or
-    // dst bounds.
-    return !clippedSrcRect->isEmpty();
-}
 #endif
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index 8251ec3..ae59460 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -2308,9 +2308,11 @@
 }
 
 bool GrGLCaps::canCopyTexSubImage(GrPixelConfig dstConfig, bool dstHasMSAARenderBuffer,
-                                  const GrTextureType* dstTypeIfTexture,
+                                  bool dstIsTextureable, bool dstIsGLTexture2D,
+                                  GrSurfaceOrigin dstOrigin,
                                   GrPixelConfig srcConfig, bool srcHasMSAARenderBuffer,
-                                  const GrTextureType* srcTypeIfTexture) const {
+                                  bool srcIsTextureable, bool srcIsGLTexture2D,
+                                  GrSurfaceOrigin srcOrigin) const {
     // Table 3.9 of the ES2 spec indicates the supported formats with CopyTexSubImage
     // and BGRA isn't in the spec. There doesn't appear to be any extension that adds it. Perhaps
     // many drivers would allow it to work, but ANGLE does not.
@@ -2326,15 +2328,16 @@
 
     // CopyTex(Sub)Image writes to a texture and we have no way of dynamically wrapping a RT in a
     // texture.
-    if (!dstTypeIfTexture) {
+    if (!dstIsTextureable) {
         return false;
     }
 
-    // Check that we could wrap the source in an FBO, that the dst is not TEXTURE_EXTERNAL, that no
-    // mirroring is required
+    // Check that we could wrap the source in an FBO, that the dst is TEXTURE_2D, that no mirroring
+    // is required
     if (this->canConfigBeFBOColorAttachment(srcConfig) &&
-        (!srcTypeIfTexture || *srcTypeIfTexture != GrTextureType::kExternal) &&
-        *dstTypeIfTexture != GrTextureType::kExternal) {
+        (!srcIsTextureable || srcIsGLTexture2D) &&
+        dstIsGLTexture2D &&
+        dstOrigin == srcOrigin) {
         return true;
     } else {
         return false;
@@ -2342,10 +2345,11 @@
 }
 
 bool GrGLCaps::canCopyAsBlit(GrPixelConfig dstConfig, int dstSampleCnt,
-                             const GrTextureType* dstTypeIfTexture,
+                             bool dstIsTextureable, bool dstIsGLTexture2D,
+                             GrSurfaceOrigin dstOrigin,
                              GrPixelConfig srcConfig, int srcSampleCnt,
-                             const GrTextureType* srcTypeIfTexture,
-                             const SkRect& srcBounds, bool srcBoundsExact,
+                             bool srcIsTextureable, bool srcIsGLTexture2D,
+                             GrSurfaceOrigin srcOrigin, const SkRect& srcBounds,
                              const SkIRect& srcRect, const SkIPoint& dstPoint) const {
     auto blitFramebufferFlags = this->blitFramebufferSupportFlags();
     if (!this->canConfigBeFBOColorAttachment(dstConfig) ||
@@ -2353,23 +2357,30 @@
         return false;
     }
 
-    if (dstTypeIfTexture && *dstTypeIfTexture == GrTextureType::kExternal) {
+    if (dstIsTextureable && !dstIsGLTexture2D) {
         return false;
     }
-    if (srcTypeIfTexture && *srcTypeIfTexture == GrTextureType::kExternal) {
+    if (srcIsTextureable && !srcIsGLTexture2D) {
         return false;
     }
 
     if (GrGLCaps::kNoSupport_BlitFramebufferFlag & blitFramebufferFlags) {
         return false;
     }
+    if (GrGLCaps::kNoScalingOrMirroring_BlitFramebufferFlag & blitFramebufferFlags) {
+        // We would mirror to compensate for origin changes. Note that copySurface is
+        // specified such that the src and dst rects are the same.
+        if (dstOrigin != srcOrigin) {
+            return false;
+        }
+    }
 
     if (GrGLCaps::kResolveMustBeFull_BlitFrambufferFlag & blitFramebufferFlags) {
         if (srcSampleCnt > 1) {
             if (1 == dstSampleCnt) {
                 return false;
             }
-            if (SkRect::Make(srcRect) != srcBounds || !srcBoundsExact) {
+            if (SkRect::Make(srcRect) != srcBounds) {
                 return false;
             }
         }
@@ -2396,11 +2407,18 @@
             if (dstPoint.fX != srcRect.fLeft || dstPoint.fY != srcRect.fTop) {
                 return false;
             }
+            if (dstOrigin != srcOrigin) {
+                return false;
+            }
         }
     }
     return true;
 }
 
+bool GrGLCaps::canCopyAsDraw(GrPixelConfig dstConfig, bool srcIsTextureable) const {
+    return this->canConfigBeFBOColorAttachment(dstConfig) && srcIsTextureable;
+}
+
 static bool has_msaa_render_buffer(const GrSurfaceProxy* surf, const GrGLCaps& glCaps) {
     const GrRenderTargetProxy* rt = surf->asRenderTargetProxy();
     if (!rt) {
@@ -2417,6 +2435,9 @@
 
 bool GrGLCaps::onCanCopySurface(const GrSurfaceProxy* dst, const GrSurfaceProxy* src,
                                 const SkIRect& srcRect, const SkIPoint& dstPoint) const {
+    GrSurfaceOrigin dstOrigin = dst->origin();
+    GrSurfaceOrigin srcOrigin = src->origin();
+
     GrPixelConfig dstConfig = dst->config();
     GrPixelConfig srcConfig = src->config();
 
@@ -2431,7 +2452,8 @@
     SkASSERT((dstSampleCnt > 0) == SkToBool(dst->asRenderTargetProxy()));
     SkASSERT((srcSampleCnt > 0) == SkToBool(src->asRenderTargetProxy()));
 
-    // None of our copy methods can handle a swizzle.
+    // None of our copy methods can handle a swizzle. TODO: Make copySurfaceAsDraw handle the
+    // swizzle.
     if (this->shaderCaps()->configOutputSwizzle(src->config()) !=
         this->shaderCaps()->configOutputSwizzle(dst->config())) {
         return false;
@@ -2440,28 +2462,37 @@
     const GrTextureProxy* dstTex = dst->asTextureProxy();
     const GrTextureProxy* srcTex = src->asTextureProxy();
 
-    GrTextureType dstTexType;
-    GrTextureType* dstTexTypePtr = nullptr;
-    GrTextureType srcTexType;
-    GrTextureType* srcTexTypePtr = nullptr;
-    if (dstTex) {
-        dstTexType = dstTex->textureType();
-        dstTexTypePtr = &dstTexType;
-    }
-    if (srcTex) {
-        srcTexType = srcTex->textureType();
-        srcTexTypePtr = &srcTexType;
-    }
+    bool dstIsTex2D = dstTex ? (dstTex->textureType() == GrTextureType::k2D) : false;
+    bool srcIsTex2D = srcTex ? (srcTex->textureType() == GrTextureType::k2D) : false;
 
-    return this->canCopyTexSubImage(dstConfig, has_msaa_render_buffer(dst, *this), dstTexTypePtr,
-                                    srcConfig, has_msaa_render_buffer(src, *this), srcTexTypePtr) ||
-           this->canCopyAsBlit(dstConfig, dstSampleCnt, dstTexTypePtr, srcConfig, srcSampleCnt,
-                               srcTexTypePtr, src->getBoundsRect(), src->priv().isExact(),
-                               srcRect, dstPoint);
+    // One of the possible requirements for copy as blit is that the srcRect must match the bounds
+    // of the src surface. If we have a approx fit surface we can't know for sure what the src
+    // bounds will be at this time. Thus we assert that if we say we can copy as blit and the src is
+    // approx that we also can copy as draw. Therefore when it comes time to do the copy we will
+    // know we will at least be able to do it as a draw.
+#ifdef SK_DEBUG
+    if (this->canCopyAsBlit(dstConfig, dstSampleCnt, SkToBool(dstTex),
+                            dstIsTex2D, dstOrigin, srcConfig, srcSampleCnt, SkToBool(srcTex),
+                            srcIsTex2D, srcOrigin, src->getBoundsRect(), srcRect, dstPoint) &&
+        !src->priv().isExact()) {
+        SkASSERT(this->canCopyAsDraw(dstConfig, SkToBool(srcTex)));
+    }
+#endif
+
+    return this->canCopyTexSubImage(dstConfig, has_msaa_render_buffer(dst, *this),
+                                    SkToBool(dstTex), dstIsTex2D, dstOrigin,
+                                    srcConfig, has_msaa_render_buffer(src, *this),
+                                    SkToBool(srcTex), srcIsTex2D, srcOrigin) ||
+           this->canCopyAsBlit(dstConfig, dstSampleCnt, SkToBool(dstTex),
+                               dstIsTex2D, dstOrigin, srcConfig, srcSampleCnt, SkToBool(srcTex),
+                               srcIsTex2D, srcOrigin, src->getBoundsRect(), srcRect,
+                               dstPoint) ||
+           this->canCopyAsDraw(dstConfig, SkToBool(srcTex));
 }
 
 bool GrGLCaps::initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc,
-                                  bool* rectsMustMatch, bool* disallowSubrect) const {
+                                  GrSurfaceOrigin* origin, bool* rectsMustMatch,
+                                  bool* disallowSubrect) const {
     // By default, we don't require rects to match.
     *rectsMustMatch = false;
 
@@ -2471,6 +2502,7 @@
     // If the src is a texture, we can implement the blit as a draw assuming the config is
     // renderable.
     if (src->asTextureProxy() && !this->isConfigRenderable(src->config())) {
+        *origin = kBottomLeft_GrSurfaceOrigin;
         desc->fFlags = kRenderTarget_GrSurfaceFlag;
         desc->fConfig = src->config();
         return true;
@@ -2491,6 +2523,7 @@
     // possible and we return false to fallback to creating a render target dst for render-to-
     // texture. This code prefers CopyTexSubImage to fbo blit and avoids triggering temporary fbo
     // creation. It isn't clear that avoiding temporary fbo creation is actually optimal.
+    GrSurfaceOrigin originForBlitFramebuffer = kTopLeft_GrSurfaceOrigin;
     bool rectsMustMatchForBlitFramebuffer = false;
     bool disallowSubrectForBlitFramebuffer = false;
     if (src->numColorSamples() > 1 &&
@@ -2498,9 +2531,14 @@
         rectsMustMatchForBlitFramebuffer = true;
         disallowSubrectForBlitFramebuffer = true;
         // Mirroring causes rects to mismatch later, don't allow it.
+        originForBlitFramebuffer = src->origin();
     } else if (src->numColorSamples() > 1 && (this->blitFramebufferSupportFlags() &
                                               kRectsMustMatchForMSAASrc_BlitFramebufferFlag)) {
         rectsMustMatchForBlitFramebuffer = true;
+        // Mirroring causes rects to mismatch later, don't allow it.
+        originForBlitFramebuffer = src->origin();
+    } else if (this->blitFramebufferSupportFlags() & kNoScalingOrMirroring_BlitFramebufferFlag) {
+        originForBlitFramebuffer = src->origin();
     }
 
     // Check for format issues with glCopyTexSubImage2D
@@ -2508,6 +2546,7 @@
         // glCopyTexSubImage2D doesn't work with this config. If the bgra can be used with fbo blit
         // then we set up for that, otherwise fail.
         if (this->canConfigBeFBOColorAttachment(kBGRA_8888_GrPixelConfig)) {
+            *origin = originForBlitFramebuffer;
             desc->fConfig = kBGRA_8888_GrPixelConfig;
             *rectsMustMatch = rectsMustMatchForBlitFramebuffer;
             *disallowSubrect = disallowSubrectForBlitFramebuffer;
@@ -2523,6 +2562,7 @@
             // It's illegal to call CopyTexSubImage2D on a MSAA renderbuffer. Set up for FBO
             // blit or fail.
             if (this->canConfigBeFBOColorAttachment(src->config())) {
+                *origin = originForBlitFramebuffer;
                 desc->fConfig = src->config();
                 *rectsMustMatch = rectsMustMatchForBlitFramebuffer;
                 *disallowSubrect = disallowSubrectForBlitFramebuffer;
@@ -2533,6 +2573,7 @@
     }
 
     // We'll do a CopyTexSubImage. Make the dst a plain old texture.
+    *origin = src->origin();
     desc->fConfig = src->config();
     desc->fFlags = kNone_GrSurfaceFlags;
     return true;
diff --git a/src/gpu/gl/GrGLCaps.h b/src/gpu/gl/GrGLCaps.h
index 77eed27..a2e2948 100644
--- a/src/gpu/gl/GrGLCaps.h
+++ b/src/gpu/gl/GrGLCaps.h
@@ -388,17 +388,21 @@
     }
 
     bool canCopyTexSubImage(GrPixelConfig dstConfig, bool dstHasMSAARenderBuffer,
-                            const GrTextureType* dstTypeIfTexture,
+                            bool dstIsTextureable, bool dstIsGLTexture2D,
+                            GrSurfaceOrigin dstOrigin,
                             GrPixelConfig srcConfig, bool srcHasMSAARenderBuffer,
-                            const GrTextureType* srcTypeIfTexture) const;
+                            bool srcIsTextureable, bool srcIsGLTexture2D,
+                            GrSurfaceOrigin srcOrigin) const;
     bool canCopyAsBlit(GrPixelConfig dstConfig, int dstSampleCnt,
-                       const GrTextureType* dstTypeIfTexture,
+                       bool dstIsTextureable, bool dstIsGLTexture2D,
+                       GrSurfaceOrigin dstOrigin,
                        GrPixelConfig srcConfig, int srcSampleCnt,
-                        const GrTextureType* srcTypeIfTexture,
-                       const SkRect& srcBounds, bool srcBoundsExact,
+                       bool srcIsTextureable, bool srcIsGLTexture2D,
+                       GrSurfaceOrigin srcOrigin, const SkRect& srcBounds,
                        const SkIRect& srcRect, const SkIPoint& dstPoint) const;
+    bool canCopyAsDraw(GrPixelConfig dstConfig, bool srcIsTextureable) const;
 
-    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc,
+    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc, GrSurfaceOrigin*,
                             bool* rectsMustMatch, bool* disallowSubrect) const override;
 
     bool programBinarySupport() const { return fProgramBinarySupport; }
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index d670a12..a899eb0 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -3180,11 +3180,12 @@
 }
 
 // Determines whether glBlitFramebuffer could be used between src and dst by onCopySurface.
-static inline bool can_blit_framebuffer_for_copy_surface(const GrSurface* dst,
-                                                         const GrSurface* src,
-                                                         const SkIRect& srcRect,
-                                                         const SkIPoint& dstPoint,
-                                                         const GrGLCaps& caps) {
+static inline bool can_blit_framebuffer_for_copy_surface(
+                                                const GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                                const GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                                const SkIRect& srcRect,
+                                                const SkIPoint& dstPoint,
+                                                const GrGLCaps& caps) {
     int dstSampleCnt = 0;
     int srcSampleCnt = 0;
     if (const GrRenderTarget* rt = dst->asRenderTarget()) {
@@ -3199,22 +3200,12 @@
     const GrGLTexture* dstTex = static_cast<const GrGLTexture*>(dst->asTexture());
     const GrGLTexture* srcTex = static_cast<const GrGLTexture*>(src->asTexture());
 
-    GrTextureType dstTexType;
-    GrTextureType* dstTexTypePtr = nullptr;
-    GrTextureType srcTexType;
-    GrTextureType* srcTexTypePtr = nullptr;
-    if (dstTex) {
-        dstTexType = dstTex->texturePriv().textureType();
-        dstTexTypePtr = &dstTexType;
-    }
-    if (srcTex) {
-        srcTexType = srcTex->texturePriv().textureType();
-        srcTexTypePtr = &srcTexType;
-    }
+    bool dstIsGLTexture2D = dstTex ? GR_GL_TEXTURE_2D == dstTex->target() : false;
+    bool srcIsGLTexture2D = srcTex ? GR_GL_TEXTURE_2D == srcTex->target() : false;
 
-    return caps.canCopyAsBlit(dst->config(), dstSampleCnt, dstTexTypePtr,
-                              src->config(), srcSampleCnt, srcTexTypePtr,
-                              src->getBoundsRect(), true, srcRect, dstPoint);
+    return caps.canCopyAsBlit(dst->config(), dstSampleCnt, SkToBool(dstTex), dstIsGLTexture2D,
+                              dstOrigin, src->config(), srcSampleCnt, SkToBool(srcTex),
+                              srcIsGLTexture2D, srcOrigin, src->getBoundsRect(), srcRect, dstPoint);
 }
 
 static bool rt_has_msaa_render_buffer(const GrGLRenderTarget* rt, const GrGLCaps& glCaps) {
@@ -3225,7 +3216,8 @@
     return rt->numColorSamples() > 1 && glCaps.usesMSAARenderBuffers() && rt->renderFBOID() != 0;
 }
 
-static inline bool can_copy_texsubimage(const GrSurface* dst, const GrSurface* src,
+static inline bool can_copy_texsubimage(const GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                        const GrSurface* src, GrSurfaceOrigin srcOrigin,
                                         const GrGLCaps& caps) {
 
     const GrGLRenderTarget* dstRT = static_cast<const GrGLRenderTarget*>(dst->asRenderTarget());
@@ -3236,21 +3228,13 @@
     bool dstHasMSAARenderBuffer = dstRT ? rt_has_msaa_render_buffer(dstRT, caps) : false;
     bool srcHasMSAARenderBuffer = srcRT ? rt_has_msaa_render_buffer(srcRT, caps) : false;
 
-    GrTextureType dstTexType;
-    GrTextureType* dstTexTypePtr = nullptr;
-    GrTextureType srcTexType;
-    GrTextureType* srcTexTypePtr = nullptr;
-    if (dstTex) {
-        dstTexType = dstTex->texturePriv().textureType();
-        dstTexTypePtr = &dstTexType;
-    }
-    if (srcTex) {
-        srcTexType = srcTex->texturePriv().textureType();
-        srcTexTypePtr = &srcTexType;
-    }
+    bool dstIsGLTexture2D = dstTex ? GR_GL_TEXTURE_2D == dstTex->target() : false;
+    bool srcIsGLTexture2D = srcTex ? GR_GL_TEXTURE_2D == srcTex->target() : false;
 
-    return caps.canCopyTexSubImage(dst->config(), dstHasMSAARenderBuffer, dstTexTypePtr,
-                                   src->config(), srcHasMSAARenderBuffer, srcTexTypePtr);
+    return caps.canCopyTexSubImage(dst->config(), dstHasMSAARenderBuffer, SkToBool(dstTex),
+                                   dstIsGLTexture2D, dstOrigin, src->config(),
+                                   srcHasMSAARenderBuffer, SkToBool(srcTex), srcIsGLTexture2D,
+                                   srcOrigin);
 }
 
 // If a temporary FBO was created, its non-zero ID is returned.
@@ -3341,26 +3325,162 @@
     }
 }
 
-bool GrGLGpu::onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                            const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) {
-    // None of our copy methods can handle a swizzle.
+bool GrGLGpu::onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                            GrSurface* src, GrSurfaceOrigin srcOrigin,
+                            const SkIRect& srcRect, const SkIPoint& dstPoint,
+                            bool canDiscardOutsideDstRect) {
+    // None of our copy methods can handle a swizzle. TODO: Make copySurfaceAsDraw handle the
+    // swizzle.
     if (this->caps()->shaderCaps()->configOutputSwizzle(src->config()) !=
         this->caps()->shaderCaps()->configOutputSwizzle(dst->config())) {
         return false;
     }
+    // Don't prefer copying as a draw if the dst doesn't already have a FBO object.
+    // This implicitly handles this->glCaps().useDrawInsteadOfAllRenderTargetWrites().
+    bool preferCopy = SkToBool(dst->asRenderTarget());
+    if (preferCopy && this->glCaps().canCopyAsDraw(dst->config(), SkToBool(src->asTexture()))) {
+        if (this->copySurfaceAsDraw(dst, dstOrigin, src, srcOrigin, srcRect, dstPoint)) {
+            return true;
+        }
+    }
 
-    if (can_copy_texsubimage(dst, src, this->glCaps())) {
-        this->copySurfaceAsCopyTexSubImage(dst, src, srcRect, dstPoint);
+    if (can_copy_texsubimage(dst, dstOrigin, src, srcOrigin, this->glCaps())) {
+        this->copySurfaceAsCopyTexSubImage(dst, dstOrigin, src, srcOrigin, srcRect, dstPoint);
         return true;
     }
 
-    if (can_blit_framebuffer_for_copy_surface(dst, src, srcRect, dstPoint, this->glCaps())) {
-        return this->copySurfaceAsBlitFramebuffer(dst, src, srcRect, dstPoint);
+    if (can_blit_framebuffer_for_copy_surface(dst, dstOrigin, src, srcOrigin,
+                                              srcRect, dstPoint, this->glCaps())) {
+        return this->copySurfaceAsBlitFramebuffer(dst, dstOrigin, src, srcOrigin,
+                                                  srcRect, dstPoint);
+    }
+
+    if (!preferCopy && this->glCaps().canCopyAsDraw(dst->config(), SkToBool(src->asTexture()))) {
+        if (this->copySurfaceAsDraw(dst, dstOrigin, src, srcOrigin, srcRect, dstPoint)) {
+            return true;
+        }
     }
 
     return false;
 }
 
+bool GrGLGpu::createCopyProgram(GrTexture* srcTex) {
+    TRACE_EVENT0("skia", TRACE_FUNC);
+
+    int progIdx = TextureToCopyProgramIdx(srcTex);
+    const GrShaderCaps* shaderCaps = this->caps()->shaderCaps();
+    GrSLType samplerType =
+            GrSLCombinedSamplerTypeForTextureType(srcTex->texturePriv().textureType());
+
+    if (!fCopyProgramArrayBuffer) {
+        static const GrGLfloat vdata[] = {
+            0, 0,
+            0, 1,
+            1, 0,
+            1, 1
+        };
+        fCopyProgramArrayBuffer = GrGLBuffer::Make(this, sizeof(vdata), GrGpuBufferType::kVertex,
+                                                   kStatic_GrAccessPattern, vdata);
+    }
+    if (!fCopyProgramArrayBuffer) {
+        return false;
+    }
+
+    SkASSERT(!fCopyPrograms[progIdx].fProgram);
+    GL_CALL_RET(fCopyPrograms[progIdx].fProgram, CreateProgram());
+    if (!fCopyPrograms[progIdx].fProgram) {
+        return false;
+    }
+
+    const char* version = shaderCaps->versionDeclString();
+    GrShaderVar aVertex("a_vertex", kHalf2_GrSLType, GrShaderVar::kIn_TypeModifier);
+    GrShaderVar uTexCoordXform("u_texCoordXform", kHalf4_GrSLType,
+                               GrShaderVar::kUniform_TypeModifier);
+    GrShaderVar uPosXform("u_posXform", kHalf4_GrSLType, GrShaderVar::kUniform_TypeModifier);
+    GrShaderVar uTexture("u_texture", samplerType, GrShaderVar::kUniform_TypeModifier);
+    GrShaderVar vTexCoord("v_texCoord", kHalf2_GrSLType, GrShaderVar::kOut_TypeModifier);
+    GrShaderVar oFragColor("o_FragColor", kHalf4_GrSLType, GrShaderVar::kOut_TypeModifier);
+
+    SkString vshaderTxt(version);
+    if (shaderCaps->noperspectiveInterpolationSupport()) {
+        if (const char* extension = shaderCaps->noperspectiveInterpolationExtensionString()) {
+            vshaderTxt.appendf("#extension %s : require\n", extension);
+        }
+        vTexCoord.addModifier("noperspective");
+    }
+
+    aVertex.appendDecl(shaderCaps, &vshaderTxt);
+    vshaderTxt.append(";");
+    uTexCoordXform.appendDecl(shaderCaps, &vshaderTxt);
+    vshaderTxt.append(";");
+    uPosXform.appendDecl(shaderCaps, &vshaderTxt);
+    vshaderTxt.append(";");
+    vTexCoord.appendDecl(shaderCaps, &vshaderTxt);
+    vshaderTxt.append(";");
+
+    vshaderTxt.append(
+        "// Copy Program VS\n"
+        "void main() {"
+        "  v_texCoord = half2(a_vertex.xy * u_texCoordXform.xy + u_texCoordXform.zw);"
+        "  sk_Position.xy = a_vertex * u_posXform.xy + u_posXform.zw;"
+        "  sk_Position.zw = half2(0, 1);"
+        "}"
+    );
+
+    SkString fshaderTxt(version);
+    if (shaderCaps->noperspectiveInterpolationSupport()) {
+        if (const char* extension = shaderCaps->noperspectiveInterpolationExtensionString()) {
+            fshaderTxt.appendf("#extension %s : require\n", extension);
+        }
+    }
+    vTexCoord.setTypeModifier(GrShaderVar::kIn_TypeModifier);
+    vTexCoord.appendDecl(shaderCaps, &fshaderTxt);
+    fshaderTxt.append(";");
+    uTexture.appendDecl(shaderCaps, &fshaderTxt);
+    fshaderTxt.append(";");
+    fshaderTxt.appendf(
+        "// Copy Program FS\n"
+        "void main() {"
+        "  sk_FragColor = texture(u_texture, v_texCoord);"
+        "}"
+    );
+
+    auto errorHandler = this->getContext()->priv().getShaderErrorHandler();
+    SkSL::String sksl(vshaderTxt.c_str(), vshaderTxt.size());
+    SkSL::Program::Settings settings;
+    settings.fCaps = shaderCaps;
+    SkSL::String glsl;
+    std::unique_ptr<SkSL::Program> program = GrSkSLtoGLSL(*fGLContext, SkSL::Program::kVertex_Kind,
+                                                          sksl, settings, &glsl, errorHandler);
+    GrGLuint vshader = GrGLCompileAndAttachShader(*fGLContext, fCopyPrograms[progIdx].fProgram,
+                                                  GR_GL_VERTEX_SHADER, glsl, &fStats, errorHandler);
+    SkASSERT(program->fInputs.isEmpty());
+
+    sksl.assign(fshaderTxt.c_str(), fshaderTxt.size());
+    program = GrSkSLtoGLSL(*fGLContext, SkSL::Program::kFragment_Kind, sksl, settings, &glsl,
+                           errorHandler);
+    GrGLuint fshader = GrGLCompileAndAttachShader(*fGLContext, fCopyPrograms[progIdx].fProgram,
+                                                  GR_GL_FRAGMENT_SHADER, glsl, &fStats,
+                                                  errorHandler);
+    SkASSERT(program->fInputs.isEmpty());
+
+    GL_CALL(LinkProgram(fCopyPrograms[progIdx].fProgram));
+
+    GL_CALL_RET(fCopyPrograms[progIdx].fTextureUniform,
+                GetUniformLocation(fCopyPrograms[progIdx].fProgram, "u_texture"));
+    GL_CALL_RET(fCopyPrograms[progIdx].fPosXformUniform,
+                GetUniformLocation(fCopyPrograms[progIdx].fProgram, "u_posXform"));
+    GL_CALL_RET(fCopyPrograms[progIdx].fTexCoordXformUniform,
+                GetUniformLocation(fCopyPrograms[progIdx].fProgram, "u_texCoordXform"));
+
+    GL_CALL(BindAttribLocation(fCopyPrograms[progIdx].fProgram, 0, "a_vertex"));
+
+    GL_CALL(DeleteShader(vshader));
+    GL_CALL(DeleteShader(fshader));
+
+    return true;
+}
+
 bool GrGLGpu::createMipmapProgram(int progIdx) {
     const bool oddWidth = SkToBool(progIdx & 0x2);
     const bool oddHeight = SkToBool(progIdx & 0x1);
@@ -3514,30 +3634,134 @@
     return true;
 }
 
-void GrGLGpu::copySurfaceAsCopyTexSubImage(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
+bool GrGLGpu::copySurfaceAsDraw(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                const SkIRect& srcRect,
+                                const SkIPoint& dstPoint) {
+    GrGLTexture* srcTex = static_cast<GrGLTexture*>(src->asTexture());
+    int progIdx = TextureToCopyProgramIdx(srcTex);
+
+    if (!this->glCaps().canConfigBeFBOColorAttachment(dst->config())) {
+        return false;
+    }
+
+    if (!fCopyPrograms[progIdx].fProgram) {
+        if (!this->createCopyProgram(srcTex)) {
+            SkDebugf("Failed to create copy program.\n");
+            return false;
+        }
+    }
+
+    int w = srcRect.width();
+    int h = srcRect.height();
+
+    this->bindTexture(0, GrSamplerState::ClampNearest(), srcTex);
+
+    this->bindSurfaceFBOForPixelOps(dst, GR_GL_FRAMEBUFFER, kDst_TempFBOTarget);
+    this->flushViewport(dst->width(), dst->height());
+    fHWBoundRenderTargetUniqueID.makeInvalid();
+
+    SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.fX, dstPoint.fY, w, h);
+
+    this->flushProgram(fCopyPrograms[progIdx].fProgram);
+
+    fHWVertexArrayState.setVertexArrayID(this, 0);
+
+    GrGLAttribArrayState* attribs = fHWVertexArrayState.bindInternalVertexArray(this);
+    attribs->enableVertexArrays(this, 1);
+    attribs->set(this, 0, fCopyProgramArrayBuffer.get(), kFloat2_GrVertexAttribType,
+                 kFloat2_GrSLType, 2 * sizeof(GrGLfloat), 0);
+
+    // dst rect edges in NDC (-1 to 1)
+    int dw = dst->width();
+    int dh = dst->height();
+    GrGLfloat dx0 = 2.f * dstPoint.fX / dw - 1.f;
+    GrGLfloat dx1 = 2.f * (dstPoint.fX + w) / dw - 1.f;
+    GrGLfloat dy0 = 2.f * dstPoint.fY / dh - 1.f;
+    GrGLfloat dy1 = 2.f * (dstPoint.fY + h) / dh - 1.f;
+    if (kBottomLeft_GrSurfaceOrigin == dstOrigin) {
+        dy0 = -dy0;
+        dy1 = -dy1;
+    }
+
+    GrGLfloat sx0 = (GrGLfloat)srcRect.fLeft;
+    GrGLfloat sx1 = (GrGLfloat)(srcRect.fLeft + w);
+    GrGLfloat sy0 = (GrGLfloat)srcRect.fTop;
+    GrGLfloat sy1 = (GrGLfloat)(srcRect.fTop + h);
+    int sw = src->width();
+    int sh = src->height();
+    if (kBottomLeft_GrSurfaceOrigin == srcOrigin) {
+        sy0 = sh - sy0;
+        sy1 = sh - sy1;
+    }
+    if (srcTex->texturePriv().textureType() != GrTextureType::kRectangle) {
+        // src rect edges in normalized texture space (0 to 1)
+        sx0 /= sw;
+        sx1 /= sw;
+        sy0 /= sh;
+        sy1 /= sh;
+    }
+
+    GL_CALL(Uniform4f(fCopyPrograms[progIdx].fPosXformUniform, dx1 - dx0, dy1 - dy0, dx0, dy0));
+    GL_CALL(Uniform4f(fCopyPrograms[progIdx].fTexCoordXformUniform,
+                      sx1 - sx0, sy1 - sy0, sx0, sy0));
+    GL_CALL(Uniform1i(fCopyPrograms[progIdx].fTextureUniform, 0));
+
+    GrXferProcessor::BlendInfo blendInfo;
+    blendInfo.reset();
+    this->flushBlend(blendInfo, GrSwizzle::RGBA());
+    this->flushColorWrite(true);
+    this->flushHWAAState(nullptr, false);
+    this->disableScissor();
+    this->disableWindowRectangles();
+    this->disableStencil();
+    if (this->glCaps().srgbWriteControl()) {
+        this->flushFramebufferSRGB(true);
+    }
+
+    GL_CALL(DrawArrays(GR_GL_TRIANGLE_STRIP, 0, 4));
+    this->unbindTextureFBOForPixelOps(GR_GL_FRAMEBUFFER, dst);
+    this->didWriteToSurface(dst, dstOrigin, &dstRect);
+
+    return true;
+}
+
+void GrGLGpu::copySurfaceAsCopyTexSubImage(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                           GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                           const SkIRect& srcRect,
                                            const SkIPoint& dstPoint) {
-    SkASSERT(can_copy_texsubimage(dst, src, this->glCaps()));
+    SkASSERT(can_copy_texsubimage(dst, dstOrigin, src, srcOrigin, this->glCaps()));
     this->bindSurfaceFBOForPixelOps(src, GR_GL_FRAMEBUFFER, kSrc_TempFBOTarget);
     GrGLTexture* dstTex = static_cast<GrGLTexture *>(dst->asTexture());
     SkASSERT(dstTex);
     // We modified the bound FBO
     fHWBoundRenderTargetUniqueID.makeInvalid();
+    GrGLIRect srcGLRect;
+    srcGLRect.setRelativeTo(src->height(), srcRect, srcOrigin);
 
     this->bindTextureToScratchUnit(dstTex->target(), dstTex->textureID());
+    GrGLint dstY;
+    if (kBottomLeft_GrSurfaceOrigin == dstOrigin) {
+        dstY = dst->height() - (dstPoint.fY + srcGLRect.fHeight);
+    } else {
+        dstY = dstPoint.fY;
+    }
     GL_CALL(CopyTexSubImage2D(dstTex->target(), 0,
-                              dstPoint.fX, dstPoint.fY,
-                              srcRect.fLeft, srcRect.fTop,
-                              srcRect.width(), srcRect.height()));
+                              dstPoint.fX, dstY,
+                              srcGLRect.fLeft, srcGLRect.fBottom,
+                              srcGLRect.fWidth, srcGLRect.fHeight));
     this->unbindTextureFBOForPixelOps(GR_GL_FRAMEBUFFER, src);
     SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.fX, dstPoint.fY,
                                         srcRect.width(), srcRect.height());
-    // The rect is already in device space so we pass in kTopLeft so no flip is done.
-    this->didWriteToSurface(dst, kTopLeft_GrSurfaceOrigin, &dstRect);
+    this->didWriteToSurface(dst, dstOrigin, &dstRect);
 }
 
-bool GrGLGpu::copySurfaceAsBlitFramebuffer(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
+bool GrGLGpu::copySurfaceAsBlitFramebuffer(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                           GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                           const SkIRect& srcRect,
                                            const SkIPoint& dstPoint) {
-    SkASSERT(can_blit_framebuffer_for_copy_surface(dst, src, srcRect, dstPoint, this->glCaps()));
+    SkASSERT(can_blit_framebuffer_for_copy_surface(dst, dstOrigin, src, srcOrigin,
+                                                   srcRect, dstPoint, this->glCaps()));
     SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.fX, dstPoint.fY,
                                         srcRect.width(), srcRect.height());
     if (dst == src) {
@@ -3550,25 +3774,37 @@
     this->bindSurfaceFBOForPixelOps(src, GR_GL_READ_FRAMEBUFFER, kSrc_TempFBOTarget);
     // We modified the bound FBO
     fHWBoundRenderTargetUniqueID.makeInvalid();
+    GrGLIRect srcGLRect;
+    GrGLIRect dstGLRect;
+    srcGLRect.setRelativeTo(src->height(), srcRect, srcOrigin);
+    dstGLRect.setRelativeTo(dst->height(), dstRect, dstOrigin);
 
     // BlitFrameBuffer respects the scissor, so disable it.
     this->disableScissor();
     this->disableWindowRectangles();
 
-    GL_CALL(BlitFramebuffer(srcRect.fLeft,
-                            srcRect.fTop,
-                            srcRect.fRight,
-                            srcRect.fBottom,
-                            dstRect.fLeft,
-                            dstRect.fTop,
-                            dstRect.fRight,
-                            dstRect.fBottom,
+    GrGLint srcY0;
+    GrGLint srcY1;
+    // Does the blit need to y-mirror or not?
+    if (srcOrigin == dstOrigin) {
+        srcY0 = srcGLRect.fBottom;
+        srcY1 = srcGLRect.fBottom + srcGLRect.fHeight;
+    } else {
+        srcY0 = srcGLRect.fBottom + srcGLRect.fHeight;
+        srcY1 = srcGLRect.fBottom;
+    }
+    GL_CALL(BlitFramebuffer(srcGLRect.fLeft,
+                            srcY0,
+                            srcGLRect.fLeft + srcGLRect.fWidth,
+                            srcY1,
+                            dstGLRect.fLeft,
+                            dstGLRect.fBottom,
+                            dstGLRect.fLeft + dstGLRect.fWidth,
+                            dstGLRect.fBottom + dstGLRect.fHeight,
                             GR_GL_COLOR_BUFFER_BIT, GR_GL_NEAREST));
     this->unbindTextureFBOForPixelOps(GR_GL_DRAW_FRAMEBUFFER, dst);
     this->unbindTextureFBOForPixelOps(GR_GL_READ_FRAMEBUFFER, src);
-
-    // The rect is already in device space so we pass in kTopLeft so no flip is done.
-    this->didWriteToSurface(dst, kTopLeft_GrSurfaceOrigin, &dstRect);
+    this->didWriteToSurface(dst, dstOrigin, &dstRect);
     return true;
 }
 
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 6b19022..5a6c6d8 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -255,8 +255,10 @@
 
     bool onRegenerateMipMapLevels(GrTexture*) override;
 
-    bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                       const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) override;
+    bool onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                       GrSurface* src, GrSurfaceOrigin srcOrigin,
+                       const SkIRect& srcRect, const SkIPoint& dstPoint,
+                       bool canDiscardOutsideDstRect) override;
 
     // binds texture unit in GL
     void setTextureUnit(int unitIdx);
@@ -299,12 +301,15 @@
 
     bool waitSync(GrGLsync, uint64_t timeout, bool flush);
 
-    bool copySurfaceAsDraw(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                           const SkIPoint& dstPoint);
-    void copySurfaceAsCopyTexSubImage(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                                      const SkIPoint& dstPoint);
-    bool copySurfaceAsBlitFramebuffer(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                                      const SkIPoint& dstPoint);
+    bool copySurfaceAsDraw(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                           GrSurface* src, GrSurfaceOrigin srcOrigin,
+                           const SkIRect& srcRect, const SkIPoint& dstPoint);
+    void copySurfaceAsCopyTexSubImage(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                      GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                      const SkIRect& srcRect, const SkIPoint& dstPoint);
+    bool copySurfaceAsBlitFramebuffer(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                      GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                      const SkIRect& srcRect, const SkIPoint& dstPoint);
 
     static bool BlendCoeffReferencesConstant(GrBlendCoeff coeff);
 
@@ -425,6 +430,7 @@
     void onDumpJSON(SkJSONWriter*) const override;
 #endif
 
+    bool createCopyProgram(GrTexture* srcTexture);
     bool createMipmapProgram(int progIdx);
 
     std::unique_ptr<GrGLContext> fGLContext;
diff --git a/src/gpu/gl/GrGLGpuCommandBuffer.h b/src/gpu/gl/GrGLGpuCommandBuffer.h
index 0ec79c6..0f9370f 100644
--- a/src/gpu/gl/GrGLGpuCommandBuffer.h
+++ b/src/gpu/gl/GrGLGpuCommandBuffer.h
@@ -21,8 +21,9 @@
 public:
     GrGLGpuTextureCommandBuffer(GrGLGpu* gpu) : fGpu(gpu) {}
 
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override {
-        fGpu->copySurface(fTexture, src, srcRect, dstPoint);
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override {
+        fGpu->copySurface(fTexture, fOrigin, src, srcOrigin, srcRect, dstPoint);
     }
 
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
@@ -67,8 +68,9 @@
         state->doUpload(upload);
     }
 
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override {
-        fGpu->copySurface(fRenderTarget, src,srcRect, dstPoint);
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override {
+        fGpu->copySurface(fRenderTarget, fOrigin, src, srcOrigin, srcRect, dstPoint);
     }
 
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
diff --git a/src/gpu/mock/GrMockCaps.h b/src/gpu/mock/GrMockCaps.h
index c3806d1..b8059d3 100644
--- a/src/gpu/mock/GrMockCaps.h
+++ b/src/gpu/mock/GrMockCaps.h
@@ -70,7 +70,7 @@
 
     bool surfaceSupportsReadPixels(const GrSurface*) const override { return true; }
 
-    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc,
+    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc, GrSurfaceOrigin*,
                             bool* rectsMustMatch, bool* disallowSubrect) const override {
         return false;
     }
diff --git a/src/gpu/mock/GrMockGpu.h b/src/gpu/mock/GrMockGpu.h
index de3af71..cf49972 100644
--- a/src/gpu/mock/GrMockGpu.h
+++ b/src/gpu/mock/GrMockGpu.h
@@ -101,7 +101,8 @@
                               GrColorType, GrGpuBuffer* transferBuffer, size_t offset) override {
         return true;
     }
-    bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
+    bool onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin, GrSurface* src,
+                       GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
                        const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) override {
         return true;
     }
diff --git a/src/gpu/mock/GrMockGpuCommandBuffer.h b/src/gpu/mock/GrMockGpuCommandBuffer.h
index 7532bdc..9e2905e 100644
--- a/src/gpu/mock/GrMockGpuCommandBuffer.h
+++ b/src/gpu/mock/GrMockGpuCommandBuffer.h
@@ -19,7 +19,8 @@
 
     ~GrMockGpuTextureCommandBuffer() override {}
 
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override {}
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override {}
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
                       GrGpuBuffer* transferBuffer, size_t offset) override {}
     void insertEventMarker(const char*) override {}
@@ -41,7 +42,8 @@
     void insertEventMarker(const char*) override {}
     void begin() override {}
     void end() override {}
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override {}
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override {}
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
                       GrGpuBuffer* transferBuffer, size_t offset) override {}
 
diff --git a/src/gpu/mtl/GrMtlCaps.h b/src/gpu/mtl/GrMtlCaps.h
index 2ee1d88..7939dc2 100644
--- a/src/gpu/mtl/GrMtlCaps.h
+++ b/src/gpu/mtl/GrMtlCaps.h
@@ -46,11 +46,18 @@
         return fPreferredStencilFormat;
     }
 
-    bool canCopyAsBlit(GrPixelConfig dstConfig, int dstSampleCount, GrPixelConfig srcConfig,
-                       int srcSampleCount, const SkIRect& srcRect, const SkIPoint& dstPoint,
+    bool canCopyAsBlit(GrPixelConfig dstConfig, int dstSampleCount, GrSurfaceOrigin dstOrigin,
+                       GrPixelConfig srcConfig, int srcSampleCount, GrSurfaceOrigin srcOrigin,
+                       const SkIRect& srcRect, const SkIPoint& dstPoint,
                        bool areDstSrcSameObj) const;
 
-    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc,
+    bool canCopyAsDraw(GrPixelConfig dstConfig, bool dstIsRenderable,
+                       GrPixelConfig srcConfig, bool srcIsTextureable) const;
+
+    bool canCopyAsDrawThenBlit(GrPixelConfig dstConfig, GrPixelConfig srcConfig,
+                               bool srcIsTextureable) const;
+
+    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc, GrSurfaceOrigin*,
                             bool* rectsMustMatch, bool* disallowSubrect) const override {
         return false;
     }
diff --git a/src/gpu/mtl/GrMtlCaps.mm b/src/gpu/mtl/GrMtlCaps.mm
index 7e630d2..04a87ae 100644
--- a/src/gpu/mtl/GrMtlCaps.mm
+++ b/src/gpu/mtl/GrMtlCaps.mm
@@ -119,7 +119,9 @@
 }
 
 bool GrMtlCaps::canCopyAsBlit(GrPixelConfig dstConfig, int dstSampleCount,
+                              GrSurfaceOrigin dstOrigin,
                               GrPixelConfig srcConfig, int srcSampleCount,
+                              GrSurfaceOrigin srcOrigin,
                               const SkIRect& srcRect, const SkIPoint& dstPoint,
                               bool areDstSrcSameObj) const {
     if (dstConfig != srcConfig) {
@@ -128,6 +130,9 @@
     if ((dstSampleCount > 1 || srcSampleCount > 1) && (dstSampleCount != srcSampleCount)) {
         return false;
     }
+    if (dstOrigin != srcOrigin) {
+        return false;
+    }
     if (areDstSrcSameObj) {
         SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.x(), dstPoint.y(),
                                             srcRect.width(), srcRect.height());
@@ -138,8 +143,38 @@
     return true;
 }
 
+bool GrMtlCaps::canCopyAsDraw(GrPixelConfig dstConfig, bool dstIsRenderable,
+                              GrPixelConfig srcConfig, bool srcIsTextureable) const {
+    // TODO: Make copySurfaceAsDraw handle the swizzle
+    if (this->shaderCaps()->configOutputSwizzle(srcConfig) !=
+        this->shaderCaps()->configOutputSwizzle(dstConfig)) {
+        return false;
+    }
+
+    if (!dstIsRenderable || !srcIsTextureable) {
+        return false;
+    }
+    return true;
+}
+
+bool GrMtlCaps::canCopyAsDrawThenBlit(GrPixelConfig dstConfig, GrPixelConfig srcConfig,
+                                      bool srcIsTextureable) const {
+    // TODO: Make copySurfaceAsDraw handle the swizzle
+    if (this->shaderCaps()->configOutputSwizzle(srcConfig) !=
+        this->shaderCaps()->configOutputSwizzle(dstConfig)) {
+        return false;
+    }
+    if (!srcIsTextureable) {
+        return false;
+    }
+    return true;
+}
+
 bool GrMtlCaps::onCanCopySurface(const GrSurfaceProxy* dst, const GrSurfaceProxy* src,
                                  const SkIRect& srcRect, const SkIPoint& dstPoint) const {
+    GrSurfaceOrigin dstOrigin = dst->origin();
+    GrSurfaceOrigin srcOrigin = src->origin();
+
     int dstSampleCnt = 0;
     int srcSampleCnt = 0;
     if (const GrRenderTargetProxy* rtProxy = dst->asRenderTargetProxy()) {
@@ -151,8 +186,13 @@
     SkASSERT((dstSampleCnt > 0) == SkToBool(dst->asRenderTargetProxy()));
     SkASSERT((srcSampleCnt > 0) == SkToBool(src->asRenderTargetProxy()));
 
-    return this->canCopyAsBlit(dst->config(), dstSampleCnt, src->config(), srcSampleCnt, srcRect,
-                               dstPoint, dst == src);
+    return this->canCopyAsBlit(dst->config(), dstSampleCnt, dstOrigin,
+                               src->config(), srcSampleCnt, srcOrigin,
+                               srcRect, dstPoint, dst == src) ||
+           this->canCopyAsDraw(dst->config(), SkToBool(dst->asRenderTargetProxy()),
+                               src->config(), SkToBool(src->asTextureProxy())) ||
+           this->canCopyAsDrawThenBlit(dst->config(), src->config(),
+                                       SkToBool(src->asTextureProxy()));
 }
 
 void GrMtlCaps::initGrCaps(const id<MTLDevice> device) {
diff --git a/src/gpu/mtl/GrMtlCopyManager.h b/src/gpu/mtl/GrMtlCopyManager.h
new file mode 100644
index 0000000..68d1a22
--- /dev/null
+++ b/src/gpu/mtl/GrMtlCopyManager.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+*/
+
+#ifndef GrMtlCopyManager_DEFINED
+#define GrMtlCopyManager_DEFINED
+
+#include "include/gpu/GrTypes.h"
+
+#import <metal/metal.h>
+
+class GrMtlCopyPipelineState;
+class GrMtlGpu;
+class GrSurface;
+struct SkIPoint;
+struct SkIRect;
+
+class GrMtlCopyManager {
+public:
+    GrMtlCopyManager(GrMtlGpu* gpu) : fGpu(gpu) {}
+
+    bool copySurfaceAsDraw(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                           GrSurface* src, GrSurfaceOrigin srcOrigin,
+                           const SkIRect& srcRect, const SkIPoint& dstPoint,
+                           bool canDiscardOutsideDstRect);
+
+    static bool IsCompatible(const GrMtlCopyPipelineState*, MTLPixelFormat dstPixelFormat);
+
+    void destroyResources();
+
+private:
+    enum BufferIndex {
+        kUniform_BufferIndex,
+        kAttribute_BufferIndex,
+    };
+
+    void createCopyProgramBuffer();
+    void createCopyProgramShaders();
+    void createCopyProgramVertexDescriptor();
+
+    void createCopyProgram();
+
+    id<MTLSamplerState>  fSamplerState;
+    id<MTLBuffer>        fVertexAttributeBuffer;
+    id<MTLFunction>      fVertexFunction;
+    id<MTLFunction>      fFragmentFunction;
+    MTLVertexDescriptor* fVertexDescriptor;
+
+    GrMtlGpu* fGpu;
+};
+
+#endif
diff --git a/src/gpu/mtl/GrMtlCopyManager.mm b/src/gpu/mtl/GrMtlCopyManager.mm
new file mode 100644
index 0000000..b79950f
--- /dev/null
+++ b/src/gpu/mtl/GrMtlCopyManager.mm
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/mtl/GrMtlCopyManager.h"
+
+#include "include/gpu/GrSurface.h"
+
+#include "src/gpu/mtl/GrMtlBuffer.h"
+#include "src/gpu/mtl/GrMtlCommandBuffer.h"
+#include "src/gpu/mtl/GrMtlCopyPipelineState.h"
+#include "src/gpu/mtl/GrMtlGpu.h"
+#include "src/gpu/mtl/GrMtlResourceProvider.h"
+#include "src/gpu/mtl/GrMtlUtil.h"
+
+#include "include/core/SkPoint.h"
+#include "include/core/SkRect.h"
+#include "src/core/SkTraceEvent.h"
+
+#import <simd/simd.h>
+
+#if !__has_feature(objc_arc)
+#error This file must be compiled with Arc. Use -fobjc-arc flag
+#endif
+
+void GrMtlCopyManager::createCopyProgramBuffer() {
+    // Create per vertex attribute data for copy as draw
+    static const simd::float2 vdata[4] = {
+        {0, 0},
+        {0, 1},
+        {1, 0},
+        {1, 1},
+    };
+    sk_sp<GrMtlBuffer> mtlBuffer = GrMtlBuffer::Make(fGpu, sizeof(vdata), GrGpuBufferType::kVertex,
+                                                     kStatic_GrAccessPattern, vdata);
+    fVertexAttributeBuffer = mtlBuffer->mtlBuffer();
+}
+
+void GrMtlCopyManager::createCopyProgramShaders() {
+     // Create shaders required by pipeline state
+    const GrShaderCaps* shaderCaps = fGpu->caps()->shaderCaps();
+    const char* version = shaderCaps->versionDeclString();
+    SkString vertShaderText(version);
+    vertShaderText.appendf(
+        "#extension GL_ARB_separate_shader_objects : enable\n"
+        "#extension GL_ARB_shading_language_420pack : enable\n"
+        "layout(set = %d"/*kUniform_BufferIndex*/", binding = 0) uniform vertexUniformBuffer {"
+            "float4 uPosXform;"
+            "float4 uTexCoordXform;"
+        "};"
+        "layout(location = 0) in float2 inPosition;"
+        "layout(location = 1) out float2 vTexCoord;"
+
+        "// Copy Program VS\n"
+        "void main() {"
+            "vTexCoord = inPosition * uTexCoordXform.xy + uTexCoordXform.zw;"
+            "sk_Position.xy = inPosition * uPosXform.xy + uPosXform.zw;"
+            "sk_Position.zw = float2(0, 1);"
+        "}",
+        kUniform_BufferIndex
+    );
+
+    SkString fragShaderText(version);
+    fragShaderText.append(
+        "#extension GL_ARB_separate_shader_objects : enable\n"
+        "#extension GL_ARB_shading_language_420pack : enable\n"
+
+        "layout(set = 1, binding = 0) uniform sampler2D uTexture;"
+        "layout(location = 1) in float2 vTexCoord;"
+
+        "// Copy Program FS\n"
+        "void main() {"
+            "sk_FragColor = texture(uTexture, vTexCoord);"
+        "}"
+    );
+
+    SkSL::Program::Settings settings;
+    SkSL::Program::Inputs inputs;
+    id<MTLLibrary> vertexLibrary = GrCompileMtlShaderLibrary(fGpu, vertShaderText.c_str(),
+                                                             SkSL::Program::kVertex_Kind,
+                                                             settings, &inputs);
+    SkASSERT(inputs.isEmpty());
+    SkASSERT(vertexLibrary);
+
+    id<MTLLibrary> fragmentLibrary = GrCompileMtlShaderLibrary(fGpu, fragShaderText.c_str(),
+                                                               SkSL::Program::kFragment_Kind,
+                                                               settings, &inputs);
+    SkASSERT(inputs.isEmpty());
+    SkASSERT(fragmentLibrary);
+
+    id<MTLFunction> vertexFunction = [vertexLibrary newFunctionWithName: @"vertexMain"];
+    id<MTLFunction> fragmentFunction = [fragmentLibrary newFunctionWithName: @"fragmentMain"];
+    SkASSERT(vertexFunction);
+    SkASSERT(fragmentFunction);
+
+    fVertexFunction = vertexFunction;
+    fFragmentFunction = fragmentFunction;
+}
+
+void GrMtlCopyManager::createCopyProgramVertexDescriptor() {
+    // Create vertex descriptor for pipeline state
+    // Expected [[stage_in]] (vertex attribute) MSL format for copies:
+    //
+    // struct Input {
+    //     float2 inPosition [[attribute(0)]];
+    // };
+    MTLVertexDescriptor* vertexDescriptor = [[MTLVertexDescriptor alloc] init];
+    vertexDescriptor.attributes[0].format = MTLVertexFormatFloat2;
+    vertexDescriptor.attributes[0].offset = 0;
+    vertexDescriptor.attributes[0].bufferIndex = kAttribute_BufferIndex;
+
+    vertexDescriptor.layouts[kAttribute_BufferIndex].stepFunction = MTLVertexStepFunctionPerVertex;
+    vertexDescriptor.layouts[kAttribute_BufferIndex].stepRate = 1;
+    vertexDescriptor.layouts[kAttribute_BufferIndex].stride = sizeof(simd::float2);
+
+    fVertexDescriptor = vertexDescriptor;
+}
+
+void GrMtlCopyManager::createCopyProgram() {
+    TRACE_EVENT0("skia", TRACE_FUNC);
+
+    MTLSamplerDescriptor* samplerDescriptor = [[MTLSamplerDescriptor alloc] init];
+    fSamplerState = [fGpu->device() newSamplerStateWithDescriptor: samplerDescriptor];
+
+    this->createCopyProgramBuffer();
+    this->createCopyProgramShaders();
+    this->createCopyProgramVertexDescriptor();
+}
+
+bool GrMtlCopyManager::copySurfaceAsDraw(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                         GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                         const SkIRect& srcRect, const SkIPoint& dstPoint,
+                                         bool canDiscardOutsideDstRect) {
+    SkASSERT(fGpu->mtlCaps().canCopyAsDraw(dst->config(), SkToBool(dst->asRenderTarget()),
+                                           src->config(), SkToBool(src->asTexture())));
+
+    id<MTLTexture> dstTex = GrGetMTLTextureFromSurface(dst, false);
+    id<MTLTexture> srcTex = GrGetMTLTextureFromSurface(src, false);
+    SkASSERT(srcTex != dstTex);
+
+    if (fSamplerState == nil) {
+        SkASSERT(fVertexAttributeBuffer == nil);
+        SkASSERT(fVertexFunction == nil);
+        SkASSERT(fFragmentFunction == nil);
+        SkASSERT(fVertexDescriptor == nil);
+
+        this->createCopyProgram();
+    }
+
+    if (!(fSamplerState && fVertexAttributeBuffer && fVertexFunction &&
+          fFragmentFunction && fVertexDescriptor)) {
+        SkASSERT(false);
+        return false;
+    }
+
+    // UPDATE UNIFORM DESCRIPTOR SET
+    int w = srcRect.width();
+    int h = srcRect.height();
+
+    // dst rect edges in NDC (-1 to 1)
+    int dw = dstTex.width;
+    int dh = dstTex.height;
+    float dx0 = 2.f * dstPoint.fX / dw - 1.f;
+    float dx1 = 2.f * (dstPoint.fX + w) / dw - 1.f;
+    float dy0 = 2.f * dstPoint.fY / dh - 1.f;
+    float dy1 = 2.f * (dstPoint.fY + h) / dh - 1.f;
+    if (kBottomLeft_GrSurfaceOrigin == dstOrigin) {
+        dy0 = -dy0;
+        dy1 = -dy1;
+    }
+
+    float sx0 = (float)srcRect.fLeft;
+    float sx1 = (float)(srcRect.fLeft + w);
+    float sy0 = (float)srcRect.fTop;
+    float sy1 = (float)(srcRect.fTop + h);
+    int sh = srcTex.height;
+    if (kBottomLeft_GrSurfaceOrigin == srcOrigin) {
+        sy0 = sh - sy0;
+        sy1 = sh - sy1;
+    }
+
+    // src rect edges in normalized texture space (0 to 1).
+    int sw = srcTex.width;
+    sx0 /= sw;
+    sx1 /= sw;
+    sy0 /= sh;
+    sy1 /= sh;
+
+    const simd::float4 vertexUniformBuffer[2] = {
+        {dx1 - dx0, dy1 - dy0, dx0, dy0}, // posXform
+        {sx1 - sx0, sy1 - sy0, sx0, sy0}, // texCoordXform
+    };
+
+    MTLRenderPassDescriptor* renderPassDesc = [MTLRenderPassDescriptor renderPassDescriptor];
+    renderPassDesc.colorAttachments[0].texture = dstTex;
+    renderPassDesc.colorAttachments[0].slice = 0;
+    renderPassDesc.colorAttachments[0].level = 0;
+    renderPassDesc.colorAttachments[0].loadAction = canDiscardOutsideDstRect ? MTLLoadActionDontCare
+                                                                             : MTLLoadActionLoad;
+    renderPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore;
+
+    id<MTLRenderCommandEncoder> renderCmdEncoder =
+            fGpu->commandBuffer()->getRenderCommandEncoder(renderPassDesc, nullptr);
+    GrMtlCopyPipelineState* copyPipelineState =
+            fGpu->resourceProvider().findOrCreateCopyPipelineState(dstTex.pixelFormat,
+                                                                   fVertexFunction,
+                                                                   fFragmentFunction,
+                                                                   fVertexDescriptor);
+    [renderCmdEncoder setRenderPipelineState: copyPipelineState->mtlCopyPipelineState()];
+    [renderCmdEncoder setVertexBuffer: fVertexAttributeBuffer
+                               offset: 0
+                              atIndex: kAttribute_BufferIndex];
+    [renderCmdEncoder setVertexBytes: vertexUniformBuffer
+                              length: sizeof(vertexUniformBuffer)
+                             atIndex: kUniform_BufferIndex];
+    [renderCmdEncoder setFragmentTexture: srcTex
+                                 atIndex: 0];
+    [renderCmdEncoder setFragmentSamplerState: fSamplerState
+                                      atIndex: 0];
+    [renderCmdEncoder drawPrimitives: MTLPrimitiveTypeTriangleStrip
+                         vertexStart: 0
+                         vertexCount: 4];
+    return true;
+}
+
+bool GrMtlCopyManager::IsCompatible(const GrMtlCopyPipelineState* pipelineState,
+                                    MTLPixelFormat dstPixelFormat) {
+    return pipelineState->fPixelFormat == dstPixelFormat;
+}
+
+void GrMtlCopyManager::destroyResources() {
+    fSamplerState = nil;
+    fVertexAttributeBuffer = nil;
+    fVertexFunction = nil;
+    fFragmentFunction = nil;
+    fVertexDescriptor = nil;
+}
diff --git a/src/gpu/mtl/GrMtlCopyPipelineState.h b/src/gpu/mtl/GrMtlCopyPipelineState.h
new file mode 100644
index 0000000..ce4cbb2
--- /dev/null
+++ b/src/gpu/mtl/GrMtlCopyPipelineState.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrMtlCopyPipelineState_DEFINED
+#define GrMtlCopyPipelineState_DEFINED
+
+#import <metal/metal.h>
+
+class GrMtlGpu;
+
+class GrMtlCopyPipelineState {
+public:
+    static GrMtlCopyPipelineState* CreateCopyPipelineState(GrMtlGpu* gpu,
+                                                           MTLPixelFormat dstPixelFormat,
+                                                           id<MTLFunction> vertexFunction,
+                                                           id<MTLFunction> fragmentFunction,
+                                                           MTLVertexDescriptor* vertexDescriptor);
+
+    id<MTLRenderPipelineState> mtlCopyPipelineState() { return fPipelineState; }
+
+private:
+    GrMtlCopyPipelineState(id<MTLRenderPipelineState> pipelineState,
+                       MTLPixelFormat pixelFormat)
+            : fPipelineState(pipelineState)
+            , fPixelFormat(pixelFormat) {}
+
+    id<MTLRenderPipelineState> fPipelineState;
+    MTLPixelFormat fPixelFormat;
+
+    friend class GrMtlCopyManager;
+};
+
+#endif
diff --git a/src/gpu/mtl/GrMtlCopyPipelineState.mm b/src/gpu/mtl/GrMtlCopyPipelineState.mm
new file mode 100644
index 0000000..43db99d
--- /dev/null
+++ b/src/gpu/mtl/GrMtlCopyPipelineState.mm
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/mtl/GrMtlCopyPipelineState.h"
+#include "src/gpu/mtl/GrMtlGpu.h"
+
+#if !__has_feature(objc_arc)
+#error This file must be compiled with Arc. Use -fobjc-arc flag
+#endif
+
+GrMtlCopyPipelineState* GrMtlCopyPipelineState::CreateCopyPipelineState(
+        GrMtlGpu* gpu,
+        MTLPixelFormat dstPixelFormat,
+        id<MTLFunction> vertexFunction,
+        id<MTLFunction> fragmentFunction,
+        MTLVertexDescriptor* vertexDescriptor) {
+
+    // Create pipeline state for copy as draw
+    MTLRenderPipelineDescriptor* pipelineDescriptor = [MTLRenderPipelineDescriptor new];
+    pipelineDescriptor.vertexFunction = vertexFunction;
+    pipelineDescriptor.fragmentFunction = fragmentFunction;
+    pipelineDescriptor.vertexDescriptor = vertexDescriptor;
+    pipelineDescriptor.colorAttachments[0].pixelFormat = dstPixelFormat;
+
+    NSError* error = nil;
+    id<MTLRenderPipelineState> pipelineState =
+            [gpu->device() newRenderPipelineStateWithDescriptor: pipelineDescriptor
+                                                          error: &error];
+    if (error) {
+        SkDebugf("Error creating pipeline: %s\n",
+                 [[error localizedDescription] cStringUsingEncoding: NSASCIIStringEncoding]);
+        return nil;
+    }
+
+    SkASSERT(pipelineState);
+    return new GrMtlCopyPipelineState(pipelineState, dstPixelFormat);
+}
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 32c22fc..7288daf 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -14,6 +14,7 @@
 #include "src/gpu/GrSemaphore.h"
 
 #include "src/gpu/mtl/GrMtlCaps.h"
+#include "src/gpu/mtl/GrMtlCopyManager.h"
 #include "src/gpu/mtl/GrMtlResourceProvider.h"
 #include "src/gpu/mtl/GrMtlStencilAttachment.h"
 
@@ -71,11 +72,22 @@
     void testingOnly_flushGpuAndSync() override;
 #endif
 
-    bool copySurfaceAsBlit(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                           const SkIPoint& dstPoint);
+    bool copySurfaceAsBlit(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                           GrSurface* src, GrSurfaceOrigin srcOrigin,
+                           const SkIRect& srcRect, const SkIPoint& dstPoint);
 
-    bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                       const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) override;
+    // This function is needed when we want to copy between two surfaces with different origins and
+    // the destination surface is not a render target. We will first draw to a temporary render
+    // target to adjust for the different origins and then blit from there to the destination.
+    bool copySurfaceAsDrawThenBlit(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                   GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                   const SkIRect& srcRect, const SkIPoint& dstPoint);
+
+    bool onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                       GrSurface* src, GrSurfaceOrigin srcOrigin,
+                       const SkIRect& srcRect,
+                       const SkIPoint& dstPoint,
+                       bool canDiscardOutsideDstRect) override;
 
     GrGpuRTCommandBuffer* getCommandBuffer(
                                     GrRenderTarget*, GrSurfaceOrigin, const SkRect& bounds,
@@ -210,6 +222,7 @@
 
     std::unique_ptr<SkSL::Compiler> fCompiler;
 
+    GrMtlCopyManager      fCopyManager;
     GrMtlResourceProvider fResourceProvider;
 
     bool fDisconnected;
diff --git a/src/gpu/mtl/GrMtlGpu.mm b/src/gpu/mtl/GrMtlGpu.mm
index 6794704..a215b0d 100644
--- a/src/gpu/mtl/GrMtlGpu.mm
+++ b/src/gpu/mtl/GrMtlGpu.mm
@@ -99,6 +99,7 @@
         , fQueue(queue)
         , fCmdBuffer(nullptr)
         , fCompiler(new SkSL::Compiler())
+        , fCopyManager(this)
         , fResourceProvider(this)
         , fDisconnected(false) {
     fMtlCaps.reset(new GrMtlCaps(options, fDevice, featureSet));
@@ -120,6 +121,8 @@
         delete fCmdBuffer;
         fCmdBuffer = nullptr;
 
+        // We don't need to distinguish between abandon and destroy for these subsystems
+        fCopyManager.destroyResources();
         fResourceProvider.destroyResources();
 
         fQueue = nil;
@@ -132,6 +135,8 @@
 void GrMtlGpu::destroyResources() {
     // Will implicitly delete the command buffer
     this->submitCommandBuffer(SyncQueue::kForce_SyncQueue);
+
+    fCopyManager.destroyResources();
     fResourceProvider.destroyResources();
 
     fQueue = nil;
@@ -859,33 +864,126 @@
     return 0;
 }
 
-bool GrMtlGpu::copySurfaceAsBlit(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                                 const SkIPoint& dstPoint) {
+bool GrMtlGpu::copySurfaceAsBlit(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                 GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                 const SkIRect& srcRect, const SkIPoint& dstPoint) {
 #ifdef SK_DEBUG
     int dstSampleCnt = get_surface_sample_cnt(dst);
     int srcSampleCnt = get_surface_sample_cnt(src);
-    SkASSERT(this->mtlCaps().canCopyAsBlit(dst->config(), dstSampleCnt, src->config(), srcSampleCnt,
+    SkASSERT(this->mtlCaps().canCopyAsBlit(dst->config(), dstSampleCnt, dstOrigin,
+                                           src->config(), srcSampleCnt, srcOrigin,
                                            srcRect, dstPoint, dst == src));
 #endif
     id<MTLTexture> dstTex = GrGetMTLTextureFromSurface(dst, false);
     id<MTLTexture> srcTex = GrGetMTLTextureFromSurface(src, false);
 
+    // Flip rect if necessary
+    SkIRect srcMtlRect;
+    srcMtlRect.fLeft = srcRect.fLeft;
+    srcMtlRect.fRight = srcRect.fRight;
+    SkIRect dstRect;
+    dstRect.fLeft = dstPoint.fX;
+    dstRect.fRight = dstPoint.fX + srcRect.width();
+
+    if (kBottomLeft_GrSurfaceOrigin == srcOrigin) {
+        srcMtlRect.fTop = srcTex.height - srcRect.fBottom;
+        srcMtlRect.fBottom = srcTex.height - srcRect.fTop;
+    } else {
+        srcMtlRect.fTop = srcRect.fTop;
+        srcMtlRect.fBottom = srcRect.fBottom;
+    }
+
+    if (kBottomLeft_GrSurfaceOrigin == dstOrigin) {
+        dstRect.fTop = dstTex.height - dstPoint.fY - srcMtlRect.height();
+    } else {
+        dstRect.fTop = dstPoint.fY;
+    }
+    dstRect.fBottom = dstRect.fTop + srcMtlRect.height();
+
     id<MTLBlitCommandEncoder> blitCmdEncoder = this->commandBuffer()->getBlitCommandEncoder();
     [blitCmdEncoder copyFromTexture: srcTex
                         sourceSlice: 0
                         sourceLevel: 0
-                       sourceOrigin: MTLOriginMake(srcRect.x(), srcRect.y(), 0)
-                         sourceSize: MTLSizeMake(srcRect.width(), srcRect.height(), 1)
+                       sourceOrigin: MTLOriginMake(srcMtlRect.x(), srcMtlRect.y(), 0)
+                         sourceSize: MTLSizeMake(srcMtlRect.width(), srcMtlRect.height(), 1)
                           toTexture: dstTex
                    destinationSlice: 0
                    destinationLevel: 0
-                  destinationOrigin: MTLOriginMake(dstPoint.fX, dstPoint.fY, 0)];
+                  destinationOrigin: MTLOriginMake(dstRect.x(), dstRect.y(), 0)];
 
     return true;
 }
 
-bool GrMtlGpu::onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                             const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) {
+bool GrMtlGpu::copySurfaceAsDrawThenBlit(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                         GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                         const SkIRect& srcRect, const SkIPoint& dstPoint) {
+#ifdef SK_DEBUG
+    int dstSampleCnt = get_surface_sample_cnt(dst);
+    int srcSampleCnt = get_surface_sample_cnt(src);
+    SkASSERT(dstSampleCnt == 0); // dst shouldn't be a render target
+    SkASSERT(!this->mtlCaps().canCopyAsBlit(dst->config(), dstSampleCnt, dstOrigin,
+                                            src->config(), srcSampleCnt, srcOrigin,
+                                            srcRect, dstPoint, dst == src));
+    SkASSERT(!this->mtlCaps().canCopyAsDraw(dst->config(), SkToBool(dst->asRenderTarget()),
+                                            src->config(), SkToBool(src->asTexture())));
+    SkASSERT(this->mtlCaps().canCopyAsDrawThenBlit(dst->config(),src->config(),
+                                                   SkToBool(src->asTexture())));
+#endif
+    GrSurfaceDesc surfDesc;
+    surfDesc.fFlags = kRenderTarget_GrSurfaceFlag;
+    surfDesc.fWidth = srcRect.width();
+    surfDesc.fHeight = srcRect.height();
+    surfDesc.fConfig = dst->config();
+    surfDesc.fSampleCnt = 1;
+
+    id<MTLTexture> dstTex = GrGetMTLTextureFromSurface(dst, false);
+    MTLTextureDescriptor* textureDesc = GrGetMTLTextureDescriptor(dstTex);
+    textureDesc.width = srcRect.width();
+    textureDesc.height = srcRect.height();
+    textureDesc.mipmapLevelCount = 1;
+    textureDesc.usage |= MTLTextureUsageRenderTarget;
+
+    sk_sp<GrMtlTexture> transferTexture =
+            GrMtlTextureRenderTarget::CreateNewTextureRenderTarget(this,
+                                                                   SkBudgeted::kYes,
+                                                                   surfDesc,
+                                                                   textureDesc,
+                                                                   GrMipMapsStatus::kNotAllocated);
+
+    GrSurfaceOrigin transferOrigin = dstOrigin;
+    SkASSERT(this->mtlCaps().canCopyAsDraw(transferTexture->config(),
+                                           SkToBool(transferTexture->asRenderTarget()),
+                                           src->config(),
+                                           SkToBool(src->asTexture())));
+    // TODO: Eventually we will need to handle resolves either in this function or make a separate
+    // copySurfaceAsResolveThenBlit().
+    if (!this->copySurface(transferTexture.get(), transferOrigin,
+                           src, srcOrigin,
+                           srcRect, SkIPoint::Make(0, 0))) {
+        return false;
+    }
+
+    SkIRect transferRect = SkIRect::MakeXYWH(0, 0, srcRect.width(), srcRect.height());
+    SkASSERT(this->mtlCaps().canCopyAsBlit(dst->config(),
+                                           get_surface_sample_cnt(dst),
+                                           dstOrigin,
+                                           transferTexture->config(),
+                                           get_surface_sample_cnt(transferTexture.get()),
+                                           transferOrigin,
+                                           transferRect, dstPoint, false));
+    if (!this->copySurface(dst, dstOrigin,
+                           transferTexture.get(), transferOrigin,
+                           transferRect, dstPoint)) {
+        return false;
+    }
+    return true;
+}
+
+bool GrMtlGpu::onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                             GrSurface* src, GrSurfaceOrigin srcOrigin,
+                             const SkIRect& srcRect,
+                             const SkIPoint& dstPoint,
+                             bool canDiscardOutsideDstRect) {
 
     GrPixelConfig dstConfig = dst->config();
     GrPixelConfig srcConfig = src->config();
@@ -899,15 +997,23 @@
     }
 
     bool success = false;
-    if (this->mtlCaps().canCopyAsBlit(dstConfig, dstSampleCnt, srcConfig, srcSampleCnt, srcRect,
-                                      dstPoint, dst == src)) {
-        success = this->copySurfaceAsBlit(dst, src, srcRect, dstPoint);
+    if (this->mtlCaps().canCopyAsDraw(dst->config(), SkToBool(dst->asRenderTarget()),
+                                      src->config(), SkToBool(src->asTexture()))) {
+        success = fCopyManager.copySurfaceAsDraw(dst, dstOrigin, src, srcOrigin, srcRect, dstPoint,
+                                                 canDiscardOutsideDstRect);
+    } else if (this->mtlCaps().canCopyAsBlit(dstConfig, dstSampleCnt, dstOrigin,
+                                             srcConfig, srcSampleCnt, srcOrigin,
+                                             srcRect, dstPoint, dst == src)) {
+        success = this->copySurfaceAsBlit(dst, dstOrigin, src, srcOrigin, srcRect, dstPoint);
+    } else if (this->mtlCaps().canCopyAsDrawThenBlit(dst->config(), src->config(),
+                                                     SkToBool(src->asTexture()))) {
+        success = this->copySurfaceAsDrawThenBlit(dst, dstOrigin, src, srcOrigin,
+                                                  srcRect, dstPoint);
     }
     if (success) {
         SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.x(), dstPoint.y(),
                                             srcRect.width(), srcRect.height());
-        // The rect is already in device space so we pass in kTopLeft so no flip is done.
-        this->didWriteToSurface(dst, kTopLeft_GrSurfaceOrigin, &dstRect);
+        this->didWriteToSurface(dst, dstOrigin, &dstRect);
     }
     return success;
 }
diff --git a/src/gpu/mtl/GrMtlGpuCommandBuffer.h b/src/gpu/mtl/GrMtlGpuCommandBuffer.h
index 062b228..f7a0eb9 100644
--- a/src/gpu/mtl/GrMtlGpuCommandBuffer.h
+++ b/src/gpu/mtl/GrMtlGpuCommandBuffer.h
@@ -29,8 +29,9 @@
 
     ~GrMtlGpuTextureCommandBuffer() override {}
 
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override {
-        fGpu->copySurface(fTexture, src, srcRect, dstPoint);
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override {
+        fGpu->copySurface(fTexture, fOrigin, src, srcOrigin, srcRect, dstPoint);
     }
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
                       GrGpuBuffer* transferBuffer, size_t offset) override {
@@ -67,7 +68,8 @@
     }
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
                       GrGpuBuffer* transferBuffer, size_t offset) override;
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override;
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override;
 
     void submit();
 
diff --git a/src/gpu/mtl/GrMtlGpuCommandBuffer.mm b/src/gpu/mtl/GrMtlGpuCommandBuffer.mm
index 8dc4c70..7354fbe 100644
--- a/src/gpu/mtl/GrMtlGpuCommandBuffer.mm
+++ b/src/gpu/mtl/GrMtlGpuCommandBuffer.mm
@@ -92,12 +92,12 @@
     fGpu->submitIndirectCommandBuffer(fRenderTarget, fOrigin, &iBounds);
 }
 
-void GrMtlGpuRTCommandBuffer::copy(GrSurface* src, const SkIRect& srcRect,
-const SkIPoint& dstPoint) {
+void GrMtlGpuRTCommandBuffer::copy(GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                   const SkIRect& srcRect, const SkIPoint& dstPoint) {
     // We cannot have an active encoder when we call copy since it requires its own
     // command encoder.
     SkASSERT(nil == fActiveRenderCmdEncoder);
-    fGpu->copySurface(fRenderTarget, src, srcRect, dstPoint);
+    fGpu->copySurface(fRenderTarget, fOrigin, src, srcOrigin, srcRect, dstPoint);
 }
 
 void GrMtlGpuRTCommandBuffer::transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
diff --git a/src/gpu/mtl/GrMtlResourceProvider.h b/src/gpu/mtl/GrMtlResourceProvider.h
index cf19c89..ed20e59 100644
--- a/src/gpu/mtl/GrMtlResourceProvider.h
+++ b/src/gpu/mtl/GrMtlResourceProvider.h
@@ -10,6 +10,7 @@
 
 #include "include/private/SkTArray.h"
 #include "src/core/SkLRUCache.h"
+#include "src/gpu/mtl/GrMtlCopyPipelineState.h"
 #include "src/gpu/mtl/GrMtlDepthStencil.h"
 #include "src/gpu/mtl/GrMtlPipelineStateBuilder.h"
 #include "src/gpu/mtl/GrMtlSampler.h"
@@ -22,6 +23,11 @@
 public:
     GrMtlResourceProvider(GrMtlGpu* gpu);
 
+    GrMtlCopyPipelineState* findOrCreateCopyPipelineState(MTLPixelFormat dstPixelFormat,
+                                                          id<MTLFunction> vertexFunction,
+                                                          id<MTLFunction> fragmentFunction,
+                                                          MTLVertexDescriptor* vertexDescriptor);
+
     GrMtlPipelineState* findOrCreateCompatiblePipelineState(
         GrRenderTarget*, GrSurfaceOrigin,
         const GrPipeline&,
@@ -83,6 +89,8 @@
 #endif
     };
 
+    SkTArray<std::unique_ptr<GrMtlCopyPipelineState>> fCopyPipelineStateCache;
+
     GrMtlGpu* fGpu;
 
     // Cache of GrMtlPipelineStates
diff --git a/src/gpu/mtl/GrMtlResourceProvider.mm b/src/gpu/mtl/GrMtlResourceProvider.mm
index f447984..e3a63c7 100644
--- a/src/gpu/mtl/GrMtlResourceProvider.mm
+++ b/src/gpu/mtl/GrMtlResourceProvider.mm
@@ -8,6 +8,7 @@
 #include "src/gpu/mtl/GrMtlResourceProvider.h"
 
 #include "src/gpu/mtl/GrMtlCommandBuffer.h"
+#include "src/gpu/mtl/GrMtlCopyManager.h"
 #include "src/gpu/mtl/GrMtlGpu.h"
 #include "src/gpu/mtl/GrMtlPipelineState.h"
 #include "src/gpu/mtl/GrMtlUtil.h"
@@ -24,6 +25,23 @@
     fPipelineStateCache.reset(new PipelineStateCache(gpu));
 }
 
+GrMtlCopyPipelineState* GrMtlResourceProvider::findOrCreateCopyPipelineState(
+        MTLPixelFormat dstPixelFormat,
+        id<MTLFunction> vertexFunction,
+        id<MTLFunction> fragmentFunction,
+        MTLVertexDescriptor* vertexDescriptor) {
+
+    for (const auto& copyPipelineState: fCopyPipelineStateCache) {
+        if (GrMtlCopyManager::IsCompatible(copyPipelineState.get(), dstPixelFormat)) {
+            return copyPipelineState.get();
+        }
+    }
+
+    fCopyPipelineStateCache.emplace_back(GrMtlCopyPipelineState::CreateCopyPipelineState(
+             fGpu, dstPixelFormat, vertexFunction, fragmentFunction, vertexDescriptor));
+    return fCopyPipelineStateCache.back().get();
+}
+
 GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState(
         GrRenderTarget* renderTarget, GrSurfaceOrigin origin,
         const GrPipeline& pipeline, const GrPrimitiveProcessor& proc,
diff --git a/src/gpu/ops/GrCopySurfaceOp.cpp b/src/gpu/ops/GrCopySurfaceOp.cpp
index 0dfcc29..df64f3a 100644
--- a/src/gpu/ops/GrCopySurfaceOp.cpp
+++ b/src/gpu/ops/GrCopySurfaceOp.cpp
@@ -11,7 +11,57 @@
 #include "src/gpu/GrGpu.h"
 #include "src/gpu/GrMemoryPool.h"
 #include "src/gpu/GrRecordingContextPriv.h"
-#include "src/gpu/geometry/GrRect.h"
+
+// returns true if the read/written rect intersects the src/dst and false if not.
+static bool clip_src_rect_and_dst_point(const GrSurfaceProxy* dst,
+                                        const GrSurfaceProxy* src,
+                                        const SkIRect& srcRect,
+                                        const SkIPoint& dstPoint,
+                                        SkIRect* clippedSrcRect,
+                                        SkIPoint* clippedDstPoint) {
+    *clippedSrcRect = srcRect;
+    *clippedDstPoint = dstPoint;
+
+    // clip the left edge to src and dst bounds, adjusting dstPoint if necessary
+    if (clippedSrcRect->fLeft < 0) {
+        clippedDstPoint->fX -= clippedSrcRect->fLeft;
+        clippedSrcRect->fLeft = 0;
+    }
+    if (clippedDstPoint->fX < 0) {
+        clippedSrcRect->fLeft -= clippedDstPoint->fX;
+        clippedDstPoint->fX = 0;
+    }
+
+    // clip the top edge to src and dst bounds, adjusting dstPoint if necessary
+    if (clippedSrcRect->fTop < 0) {
+        clippedDstPoint->fY -= clippedSrcRect->fTop;
+        clippedSrcRect->fTop = 0;
+    }
+    if (clippedDstPoint->fY < 0) {
+        clippedSrcRect->fTop -= clippedDstPoint->fY;
+        clippedDstPoint->fY = 0;
+    }
+
+    // clip the right edge to the src and dst bounds.
+    if (clippedSrcRect->fRight > src->width()) {
+        clippedSrcRect->fRight = src->width();
+    }
+    if (clippedDstPoint->fX + clippedSrcRect->width() > dst->width()) {
+        clippedSrcRect->fRight = clippedSrcRect->fLeft + dst->width() - clippedDstPoint->fX;
+    }
+
+    // clip the bottom edge to the src and dst bounds.
+    if (clippedSrcRect->fBottom > src->height()) {
+        clippedSrcRect->fBottom = src->height();
+    }
+    if (clippedDstPoint->fY + clippedSrcRect->height() > dst->height()) {
+        clippedSrcRect->fBottom = clippedSrcRect->fTop + dst->height() - clippedDstPoint->fY;
+    }
+
+    // The above clipping steps may have inverted the rect if it didn't intersect either the src or
+    // dst bounds.
+    return !clippedSrcRect->isEmpty();
+}
 
 std::unique_ptr<GrOp> GrCopySurfaceOp::Make(GrRecordingContext* context,
                                             GrSurfaceProxy* dstProxy,
@@ -23,54 +73,22 @@
     SkIRect clippedSrcRect;
     SkIPoint clippedDstPoint;
     // If the rect is outside the srcProxy or dstProxy then we've already succeeded.
-    if (!GrClipSrcRectAndDstPoint(dstProxy->isize(), srcProxy->isize(), srcRect, dstPoint,
-                                  &clippedSrcRect, &clippedDstPoint)) {
+    if (!clip_src_rect_and_dst_point(dstProxy, srcProxy, srcRect, dstPoint,
+                                     &clippedSrcRect, &clippedDstPoint)) {
         return nullptr;
     }
     if (GrPixelConfigIsCompressed(dstProxy->config())) {
         return nullptr;
     }
 
-    SkASSERT(dstProxy->origin() == srcProxy->origin());
-    SkIRect adjSrcRect;
-    adjSrcRect.fLeft = clippedSrcRect.fLeft;
-    adjSrcRect.fRight = clippedSrcRect.fRight;
-    SkIPoint adjDstPoint;
-    adjDstPoint.fX = clippedDstPoint.fX;
-
-    // If it is bottom left origin we must flip the rects.
-    SkASSERT(dstProxy->origin() == srcProxy->origin());
-    if (kBottomLeft_GrSurfaceOrigin == srcProxy->origin()) {
-        adjSrcRect.fTop = srcProxy->height() - clippedSrcRect.fBottom;
-        adjSrcRect.fBottom = srcProxy->height() - clippedSrcRect.fTop;
-        adjDstPoint.fY = dstProxy->height() - clippedDstPoint.fY - clippedSrcRect.height();
-    } else {
-        adjSrcRect.fTop = clippedSrcRect.fTop;
-        adjSrcRect.fBottom = clippedSrcRect.fBottom;
-        adjDstPoint.fY = clippedDstPoint.fY;
-    }
-
     GrOpMemoryPool* pool = context->priv().opMemoryPool();
 
-    return pool->allocate<GrCopySurfaceOp>(srcProxy, dstProxy, adjSrcRect, adjDstPoint);
+    return pool->allocate<GrCopySurfaceOp>(srcProxy, clippedSrcRect, clippedDstPoint);
 }
 
 void GrCopySurfaceOp::onExecute(GrOpFlushState* state, const SkRect& chainBounds) {
     SkASSERT(fSrc.get()->isInstantiated());
 
-    // If we are using approx surfaces we may need to adjust our srcRect or dstPoint if the origin
-    // is bottom left.
-    GrSurfaceProxy* src = fSrc.get();
-    if (src->origin() == kBottomLeft_GrSurfaceOrigin) {
-        GrSurfaceProxy* dst = fDst.get();
-        SkASSERT(dst->isInstantiated());
-        if (src->height() != src->peekSurface()->height()) {
-            fSrcRect.offset(0, src->peekSurface()->height() - src->height());
-        }
-        if (dst->height() != dst->peekSurface()->height()) {
-            fDstPoint.fY = fDstPoint.fY + (dst->peekSurface()->height() - dst->height());
-        }
-    }
-
-    state->commandBuffer()->copy(fSrc.get()->peekSurface(), fSrcRect, fDstPoint);
+    state->commandBuffer()->copy(fSrc.get()->peekSurface(), fSrc.get()->origin(), fSrcRect,
+                                 fDstPoint);
 }
diff --git a/src/gpu/ops/GrCopySurfaceOp.h b/src/gpu/ops/GrCopySurfaceOp.h
index 58e196b..794a7f5 100644
--- a/src/gpu/ops/GrCopySurfaceOp.h
+++ b/src/gpu/ops/GrCopySurfaceOp.h
@@ -45,11 +45,9 @@
 private:
     friend class GrOpMemoryPool; // for ctor
 
-    GrCopySurfaceOp(GrSurfaceProxy* src, GrSurfaceProxy* dst, const SkIRect& srcRect,
-                    const SkIPoint& dstPoint)
+    GrCopySurfaceOp(GrSurfaceProxy* src, const SkIRect& srcRect, const SkIPoint& dstPoint)
             : INHERITED(ClassID())
             , fSrc(src)
-            , fDst(dst)
             , fSrcRect(srcRect)
             , fDstPoint(dstPoint) {
         SkRect bounds =
@@ -63,7 +61,6 @@
     void onExecute(GrOpFlushState*, const SkRect& chainBounds) override;
 
     GrPendingIOResource<GrSurfaceProxy, kRead_GrIOType>  fSrc;
-    GrPendingIOResource<GrSurfaceProxy, kWrite_GrIOType> fDst;
     SkIRect                                              fSrcRect;
     SkIPoint                                             fDstPoint;
 
diff --git a/src/gpu/vk/GrVkCaps.cpp b/src/gpu/vk/GrVkCaps.cpp
index 976df7f..4fa5dc5 100644
--- a/src/gpu/vk/GrVkCaps.cpp
+++ b/src/gpu/vk/GrVkCaps.cpp
@@ -54,7 +54,8 @@
 }
 
 bool GrVkCaps::initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc,
-                                  bool* rectsMustMatch, bool* disallowSubrect) const {
+                                  GrSurfaceOrigin* origin, bool* rectsMustMatch,
+                                  bool* disallowSubrect) const {
     // Vk doesn't use rectsMustMatch or disallowSubrect. Always return false.
     *rectsMustMatch = false;
     *disallowSubrect = false;
@@ -62,6 +63,7 @@
     // We can always succeed here with either a CopyImage (none msaa src) or ResolveImage (msaa).
     // For CopyImage we can make a simple texture, for ResolveImage we require the dst to be a
     // render target as well.
+    *origin = src->origin();
     desc->fConfig = src->config();
     if (src->numColorSamples() > 1 || src->asTextureProxy()) {
         desc->fFlags = kRenderTarget_GrSurfaceFlag;
@@ -113,8 +115,9 @@
     return 0;
 }
 
-bool GrVkCaps::canCopyImage(GrPixelConfig dstConfig, int dstSampleCnt, bool dstHasYcbcr,
-                            GrPixelConfig srcConfig, int srcSampleCnt, bool srcHasYcbcr) const {
+bool GrVkCaps::canCopyImage(GrPixelConfig dstConfig, int dstSampleCnt, GrSurfaceOrigin dstOrigin,
+                            bool dstHasYcbcr, GrPixelConfig srcConfig, int srcSampleCnt,
+                            GrSurfaceOrigin srcOrigin, bool srcHasYcbcr) const {
     if ((dstSampleCnt > 1 || srcSampleCnt > 1) && dstSampleCnt != srcSampleCnt) {
         return false;
     }
@@ -125,7 +128,8 @@
 
     // We require that all vulkan GrSurfaces have been created with transfer_dst and transfer_src
     // as image usage flags.
-    if (get_compatible_format_class(srcConfig) != get_compatible_format_class(dstConfig)) {
+    if (srcOrigin != dstOrigin ||
+        get_compatible_format_class(srcConfig) != get_compatible_format_class(dstConfig)) {
         return false;
     }
 
@@ -170,8 +174,10 @@
     return true;
 }
 
-bool GrVkCaps::canCopyAsResolve(GrPixelConfig dstConfig, int dstSampleCnt, bool dstHasYcbcr,
-                                GrPixelConfig srcConfig, int srcSampleCnt, bool srcHasYcbcr) const {
+bool GrVkCaps::canCopyAsResolve(GrPixelConfig dstConfig, int dstSampleCnt,
+                                GrSurfaceOrigin dstOrigin, bool dstHasYcbcr,
+                                GrPixelConfig srcConfig, int srcSampleCnt,
+                                GrSurfaceOrigin srcOrigin, bool srcHasYcbcr) const {
     // The src surface must be multisampled.
     if (srcSampleCnt <= 1) {
         return false;
@@ -187,6 +193,11 @@
         return false;
     }
 
+    // Surfaces must have the same origin.
+    if (srcOrigin != dstOrigin) {
+        return false;
+    }
+
     if (dstHasYcbcr || srcHasYcbcr) {
         return false;
     }
@@ -194,8 +205,32 @@
     return true;
 }
 
+bool GrVkCaps::canCopyAsDraw(GrPixelConfig dstConfig, bool dstIsRenderable, bool dstHasYcbcr,
+                             GrPixelConfig srcConfig, bool srcIsTextureable,
+                             bool srcHasYcbcr) const {
+    // TODO: Make copySurfaceAsDraw handle the swizzle
+    if (this->shaderCaps()->configOutputSwizzle(srcConfig) !=
+        this->shaderCaps()->configOutputSwizzle(dstConfig)) {
+        return false;
+    }
+
+    // Make sure the dst is a render target and the src is a texture.
+    if (!dstIsRenderable || !srcIsTextureable) {
+        return false;
+    }
+
+    if (dstHasYcbcr) {
+        return false;
+    }
+
+    return true;
+}
+
 bool GrVkCaps::onCanCopySurface(const GrSurfaceProxy* dst, const GrSurfaceProxy* src,
                                 const SkIRect& srcRect, const SkIPoint& dstPoint) const {
+    GrSurfaceOrigin dstOrigin = dst->origin();
+    GrSurfaceOrigin srcOrigin = src->origin();
+
     GrPixelConfig dstConfig = dst->config();
     GrPixelConfig srcConfig = src->config();
 
@@ -243,12 +278,14 @@
         }
     }
 
-    return this->canCopyImage(dstConfig, dstSampleCnt, dstHasYcbcr,
-                              srcConfig, srcSampleCnt, srcHasYcbcr) ||
+    return this->canCopyImage(dstConfig, dstSampleCnt, dstOrigin, dstHasYcbcr,
+                              srcConfig, srcSampleCnt, srcOrigin, srcHasYcbcr) ||
            this->canCopyAsBlit(dstConfig, dstSampleCnt, dstIsLinear, dstHasYcbcr,
                                srcConfig, srcSampleCnt, srcIsLinear, srcHasYcbcr) ||
-           this->canCopyAsResolve(dstConfig, dstSampleCnt, dstHasYcbcr,
-                                  srcConfig, srcSampleCnt, srcHasYcbcr);
+           this->canCopyAsResolve(dstConfig, dstSampleCnt, dstOrigin, dstHasYcbcr,
+                                  srcConfig, srcSampleCnt, srcOrigin, srcHasYcbcr) ||
+           this->canCopyAsDraw(dstConfig, dstSampleCnt > 0, dstHasYcbcr,
+                               srcConfig, SkToBool(src->asTextureProxy()), srcHasYcbcr);
 }
 
 template<typename T> T* get_extension_feature_struct(const VkPhysicalDeviceFeatures2& features,
diff --git a/src/gpu/vk/GrVkCaps.h b/src/gpu/vk/GrVkCaps.h
index d795bb3..9fa324c 100644
--- a/src/gpu/vk/GrVkCaps.h
+++ b/src/gpu/vk/GrVkCaps.h
@@ -139,17 +139,22 @@
      * the surface is not a render target, otherwise it is the number of samples in the render
      * target.
      */
-    bool canCopyImage(GrPixelConfig dstConfig, int dstSampleCnt, bool dstHasYcbcr,
-                      GrPixelConfig srcConfig, int srcSamplecnt, bool srcHasYcbcr) const;
+    bool canCopyImage(GrPixelConfig dstConfig, int dstSampleCnt, GrSurfaceOrigin dstOrigin,
+                      bool dstHasYcbcr, GrPixelConfig srcConfig, int srcSamplecnt,
+                      GrSurfaceOrigin srcOrigin, bool srcHasYcbcr) const;
 
     bool canCopyAsBlit(GrPixelConfig dstConfig, int dstSampleCnt, bool dstIsLinear,
                        bool dstHasYcbcr, GrPixelConfig srcConfig, int srcSampleCnt,
                        bool srcIsLinear, bool srcHasYcbcr) const;
 
-    bool canCopyAsResolve(GrPixelConfig dstConfig, int dstSampleCnt, bool dstHasYcbcr,
-                          GrPixelConfig srcConfig, int srcSamplecnt, bool srcHasYcbcr) const;
+    bool canCopyAsResolve(GrPixelConfig dstConfig, int dstSampleCnt, GrSurfaceOrigin dstOrigin,
+                          bool dstHasYcbcr, GrPixelConfig srcConfig, int srcSamplecnt,
+                          GrSurfaceOrigin srcOrigin, bool srcHasYcbcr) const;
 
-    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc,
+    bool canCopyAsDraw(GrPixelConfig dstConfig, bool dstIsRenderable, bool dstHasYcbcr,
+                       GrPixelConfig srcConfig, bool srcIsTextureable, bool srcHasYcbcr) const;
+
+    bool initDescForDstCopy(const GrRenderTargetProxy* src, GrSurfaceDesc* desc, GrSurfaceOrigin*,
                             bool* rectsMustMatch, bool* disallowSubrect) const override;
 
     GrPixelConfig validateBackendRenderTarget(const GrBackendRenderTarget&,
diff --git a/src/gpu/vk/GrVkCopyManager.cpp b/src/gpu/vk/GrVkCopyManager.cpp
new file mode 100644
index 0000000..ce9a3f9
--- /dev/null
+++ b/src/gpu/vk/GrVkCopyManager.cpp
@@ -0,0 +1,460 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+*/
+
+#include "src/gpu/vk/GrVkCopyManager.h"
+
+#include "include/core/SkPoint.h"
+#include "include/core/SkRect.h"
+#include "include/gpu/GrSamplerState.h"
+#include "include/gpu/GrSurface.h"
+#include "src/core/SkTraceEvent.h"
+#include "src/gpu/GrRenderTargetPriv.h"
+#include "src/gpu/GrShaderCaps.h"
+#include "src/gpu/GrTexturePriv.h"
+#include "src/gpu/vk/GrVkCommandBuffer.h"
+#include "src/gpu/vk/GrVkCommandPool.h"
+#include "src/gpu/vk/GrVkCopyPipeline.h"
+#include "src/gpu/vk/GrVkDescriptorSet.h"
+#include "src/gpu/vk/GrVkGpu.h"
+#include "src/gpu/vk/GrVkImageView.h"
+#include "src/gpu/vk/GrVkPipelineLayout.h"
+#include "src/gpu/vk/GrVkRenderTarget.h"
+#include "src/gpu/vk/GrVkResourceProvider.h"
+#include "src/gpu/vk/GrVkSampler.h"
+#include "src/gpu/vk/GrVkTexture.h"
+#include "src/gpu/vk/GrVkUniformBuffer.h"
+#include "src/gpu/vk/GrVkVertexBuffer.h"
+
+GrVkCopyManager::GrVkCopyManager()
+    : fVertShaderModule(VK_NULL_HANDLE)
+    , fFragShaderModule(VK_NULL_HANDLE)
+    , fPipelineLayout(nullptr) {}
+
+GrVkCopyManager::~GrVkCopyManager() {}
+
+bool GrVkCopyManager::createCopyProgram(GrVkGpu* gpu) {
+    TRACE_EVENT0("skia", TRACE_FUNC);
+
+    const GrShaderCaps* shaderCaps = gpu->caps()->shaderCaps();
+    const char* version = shaderCaps->versionDeclString();
+    SkSL::String vertShaderText(version);
+    vertShaderText.append(
+        "#extension GL_ARB_separate_shader_objects : enable\n"
+        "#extension GL_ARB_shading_language_420pack : enable\n"
+
+        "layout(set = 0, binding = 0) uniform vertexUniformBuffer {"
+            "half4 uPosXform;"
+            "half4 uTexCoordXform;"
+        "};"
+        "layout(location = 0) in float2 inPosition;"
+        "layout(location = 1) out half2 vTexCoord;"
+
+        "// Copy Program VS\n"
+        "void main() {"
+            "vTexCoord = half2(inPosition * uTexCoordXform.xy + uTexCoordXform.zw);"
+            "sk_Position.xy = inPosition * uPosXform.xy + uPosXform.zw;"
+            "sk_Position.zw = half2(0, 1);"
+        "}"
+    );
+
+    SkSL::String fragShaderText(version);
+    fragShaderText.append(
+        "#extension GL_ARB_separate_shader_objects : enable\n"
+        "#extension GL_ARB_shading_language_420pack : enable\n"
+
+        "layout(set = 1, binding = 0) uniform sampler2D uTextureSampler;"
+        "layout(location = 1) in half2 vTexCoord;"
+
+        "// Copy Program FS\n"
+        "void main() {"
+            "sk_FragColor = texture(uTextureSampler, vTexCoord);"
+        "}"
+    );
+
+    SkSL::Program::Settings settings;
+    SkSL::String spirv;
+    SkSL::Program::Inputs inputs;
+    if (!GrCompileVkShaderModule(gpu, vertShaderText, VK_SHADER_STAGE_VERTEX_BIT,
+                                 &fVertShaderModule, &fShaderStageInfo[0], settings, &spirv,
+                                 &inputs)) {
+        this->destroyResources(gpu);
+        return false;
+    }
+    SkASSERT(inputs.isEmpty());
+
+    if (!GrCompileVkShaderModule(gpu, fragShaderText, VK_SHADER_STAGE_FRAGMENT_BIT,
+                                 &fFragShaderModule, &fShaderStageInfo[1], settings, &spirv,
+                                 &inputs)) {
+        this->destroyResources(gpu);
+        return false;
+    }
+    SkASSERT(inputs.isEmpty());
+
+    VkDescriptorSetLayout dsLayout[2];
+
+    GrVkResourceProvider& resourceProvider = gpu->resourceProvider();
+
+    dsLayout[GrVkUniformHandler::kUniformBufferDescSet] = resourceProvider.getUniformDSLayout();
+
+    uint32_t samplerVisibility = kFragment_GrShaderFlag;
+    SkTArray<uint32_t> visibilityArray(&samplerVisibility, 1);
+
+    resourceProvider.getSamplerDescriptorSetHandle(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                                   visibilityArray, &fSamplerDSHandle);
+    dsLayout[GrVkUniformHandler::kSamplerDescSet] =
+        resourceProvider.getSamplerDSLayout(fSamplerDSHandle);
+
+    // Create the VkPipelineLayout
+    VkPipelineLayoutCreateInfo layoutCreateInfo;
+    memset(&layoutCreateInfo, 0, sizeof(VkPipelineLayoutCreateFlags));
+    layoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
+    layoutCreateInfo.pNext = 0;
+    layoutCreateInfo.flags = 0;
+    layoutCreateInfo.setLayoutCount = 2;
+    layoutCreateInfo.pSetLayouts = dsLayout;
+    layoutCreateInfo.pushConstantRangeCount = 0;
+    layoutCreateInfo.pPushConstantRanges = nullptr;
+
+    VkPipelineLayout pipelineLayout;
+    VkResult err = GR_VK_CALL(gpu->vkInterface(), CreatePipelineLayout(gpu->device(),
+                                                                       &layoutCreateInfo,
+                                                                       nullptr,
+                                                                       &pipelineLayout));
+    if (err) {
+        this->destroyResources(gpu);
+        return false;
+    }
+
+    fPipelineLayout = new GrVkPipelineLayout(pipelineLayout);
+
+    static const float vdata[] = {
+        0, 0,
+        0, 1,
+        1, 0,
+        1, 1
+    };
+    fVertexBuffer = GrVkVertexBuffer::Make(gpu, sizeof(vdata), false);
+    SkASSERT(fVertexBuffer.get());
+    fVertexBuffer->updateData(vdata, sizeof(vdata));
+
+    // We use 2 float4's for uniforms
+    fUniformBuffer.reset(GrVkUniformBuffer::Create(gpu, 8 * sizeof(float)));
+    SkASSERT(fUniformBuffer.get());
+
+    return true;
+}
+
+bool GrVkCopyManager::copySurfaceAsDraw(GrVkGpu* gpu,
+                                        GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                        GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                        const SkIRect& srcRect, const SkIPoint& dstPoint,
+                                        bool canDiscardOutsideDstRect) {
+    // None of our copy methods can handle a swizzle. TODO: Make copySurfaceAsDraw handle the
+    // swizzle.
+    if (gpu->caps()->shaderCaps()->configOutputSwizzle(src->config()) !=
+        gpu->caps()->shaderCaps()->configOutputSwizzle(dst->config())) {
+        return false;
+    }
+
+    GrVkRenderTarget* rt = static_cast<GrVkRenderTarget*>(dst->asRenderTarget());
+    if (!rt) {
+        return false;
+    }
+
+    GrVkTexture* srcTex = static_cast<GrVkTexture*>(src->asTexture());
+    if (!srcTex) {
+        return false;
+    }
+
+    if (VK_NULL_HANDLE == fVertShaderModule) {
+        SkASSERT(VK_NULL_HANDLE == fFragShaderModule &&
+                 nullptr == fPipelineLayout &&
+                 nullptr == fVertexBuffer.get() &&
+                 nullptr == fUniformBuffer.get());
+        if (!this->createCopyProgram(gpu)) {
+            SkDebugf("Failed to create copy program.\n");
+            return false;
+        }
+    }
+    SkASSERT(fPipelineLayout);
+
+    GrVkResourceProvider& resourceProv = gpu->resourceProvider();
+
+    GrVkCopyPipeline* pipeline = resourceProv.findOrCreateCopyPipeline(rt,
+                                                                       fShaderStageInfo,
+                                                                       fPipelineLayout->layout());
+    if (!pipeline) {
+        return false;
+    }
+
+    // UPDATE UNIFORM DESCRIPTOR SET
+    int w = srcRect.width();
+    int h = srcRect.height();
+
+    // dst rect edges in NDC (-1 to 1)
+    int dw = dst->width();
+    int dh = dst->height();
+    float dx0 = 2.f * dstPoint.fX / dw - 1.f;
+    float dx1 = 2.f * (dstPoint.fX + w) / dw - 1.f;
+    float dy0 = 2.f * dstPoint.fY / dh - 1.f;
+    float dy1 = 2.f * (dstPoint.fY + h) / dh - 1.f;
+    if (kBottomLeft_GrSurfaceOrigin == dstOrigin) {
+        dy0 = -dy0;
+        dy1 = -dy1;
+    }
+
+
+    float sx0 = (float)srcRect.fLeft;
+    float sx1 = (float)(srcRect.fLeft + w);
+    float sy0 = (float)srcRect.fTop;
+    float sy1 = (float)(srcRect.fTop + h);
+    int sh = src->height();
+    if (kBottomLeft_GrSurfaceOrigin == srcOrigin) {
+        sy0 = sh - sy0;
+        sy1 = sh - sy1;
+    }
+    // src rect edges in normalized texture space (0 to 1).
+    int sw = src->width();
+    sx0 /= sw;
+    sx1 /= sw;
+    sy0 /= sh;
+    sy1 /= sh;
+
+    float uniData[] = { dx1 - dx0, dy1 - dy0, dx0, dy0,    // posXform
+                        sx1 - sx0, sy1 - sy0, sx0, sy0 };  // texCoordXform
+
+    fUniformBuffer->updateData(gpu, uniData, sizeof(uniData), nullptr);
+
+    const GrVkDescriptorSet* uniformDS = resourceProv.getUniformDescriptorSet();
+    SkASSERT(uniformDS);
+
+    VkDescriptorBufferInfo uniBufferInfo;
+    uniBufferInfo.buffer = fUniformBuffer->buffer();
+    uniBufferInfo.offset = fUniformBuffer->offset();
+    uniBufferInfo.range = fUniformBuffer->size();
+
+    VkWriteDescriptorSet descriptorWrites;
+    descriptorWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+    descriptorWrites.pNext = nullptr;
+    descriptorWrites.dstSet = uniformDS->descriptorSet();
+    descriptorWrites.dstBinding = GrVkUniformHandler::kGeometryBinding;
+    descriptorWrites.dstArrayElement = 0;
+    descriptorWrites.descriptorCount = 1;
+    descriptorWrites.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+    descriptorWrites.pImageInfo = nullptr;
+    descriptorWrites.pBufferInfo = &uniBufferInfo;
+    descriptorWrites.pTexelBufferView = nullptr;
+
+    GR_VK_CALL(gpu->vkInterface(), UpdateDescriptorSets(gpu->device(),
+                                                        1,
+                                                        &descriptorWrites,
+                                                        0, nullptr));
+
+    // UPDATE SAMPLER DESCRIPTOR SET
+    const GrVkDescriptorSet* samplerDS =
+        gpu->resourceProvider().getSamplerDescriptorSet(fSamplerDSHandle);
+
+    GrSamplerState samplerState = GrSamplerState::ClampNearest();
+
+    GrVkSampler* sampler = resourceProv.findOrCreateCompatibleSampler(
+            samplerState, GrVkYcbcrConversionInfo());
+
+    VkDescriptorImageInfo imageInfo;
+    memset(&imageInfo, 0, sizeof(VkDescriptorImageInfo));
+    imageInfo.sampler = sampler->sampler();
+    imageInfo.imageView = srcTex->textureView()->imageView();
+    imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+    VkWriteDescriptorSet writeInfo;
+    memset(&writeInfo, 0, sizeof(VkWriteDescriptorSet));
+    writeInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+    writeInfo.pNext = nullptr;
+    writeInfo.dstSet = samplerDS->descriptorSet();
+    writeInfo.dstBinding = 0;
+    writeInfo.dstArrayElement = 0;
+    writeInfo.descriptorCount = 1;
+    writeInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+    writeInfo.pImageInfo = &imageInfo;
+    writeInfo.pBufferInfo = nullptr;
+    writeInfo.pTexelBufferView = nullptr;
+
+    GR_VK_CALL(gpu->vkInterface(), UpdateDescriptorSets(gpu->device(),
+                                                        1,
+                                                        &writeInfo,
+                                                        0, nullptr));
+
+    VkDescriptorSet vkDescSets[] = { uniformDS->descriptorSet(), samplerDS->descriptorSet() };
+
+    GrVkRenderTarget* texRT = static_cast<GrVkRenderTarget*>(srcTex->asRenderTarget());
+    if (texRT) {
+        gpu->resolveRenderTargetNoFlush(texRT);
+    }
+
+    // TODO: Make tighter bounds and then adjust bounds for origin and granularity if we see
+    //       any perf issues with using the whole bounds
+    SkIRect bounds = SkIRect::MakeWH(rt->width(), rt->height());
+
+    // Change layouts of rt and texture. We aren't blending so we don't need color attachment read
+    // access for blending.
+    GrVkImage* targetImage = rt->msaaImage() ? rt->msaaImage() : rt;
+    VkAccessFlags dstAccessFlags = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+    if (!canDiscardOutsideDstRect) {
+        // We need to load the color attachment so need to be able to read it.
+        dstAccessFlags |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
+    }
+    targetImage->setImageLayout(gpu,
+                                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                                dstAccessFlags,
+                                VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                                false);
+
+    srcTex->setImageLayout(gpu,
+                           VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                           VK_ACCESS_SHADER_READ_BIT,
+                           VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+                           false);
+
+    GrStencilAttachment* stencil = rt->renderTargetPriv().getStencilAttachment();
+    if (stencil) {
+        GrVkStencilAttachment* vkStencil = (GrVkStencilAttachment*)stencil;
+        // We aren't actually using the stencil but we still load and store it so we need
+        // appropriate barriers.
+        // TODO: Once we refactor surface and how we conntect stencil to RTs, we should not even
+        // have the stencil on this render pass if possible.
+        vkStencil->setImageLayout(gpu,
+                                  VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+                                  VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
+                                  false);
+    }
+
+    VkAttachmentLoadOp loadOp = canDiscardOutsideDstRect ? VK_ATTACHMENT_LOAD_OP_DONT_CARE
+                                                         : VK_ATTACHMENT_LOAD_OP_LOAD;
+    GrVkRenderPass::LoadStoreOps vkColorOps(loadOp, VK_ATTACHMENT_STORE_OP_STORE);
+    GrVkRenderPass::LoadStoreOps vkStencilOps(VK_ATTACHMENT_LOAD_OP_LOAD,
+                                              VK_ATTACHMENT_STORE_OP_STORE);
+    const GrVkRenderPass* renderPass;
+    const GrVkResourceProvider::CompatibleRPHandle& rpHandle = rt->compatibleRenderPassHandle();
+    if (rpHandle.isValid()) {
+        renderPass = gpu->resourceProvider().findRenderPass(rpHandle,
+                                                            vkColorOps,
+                                                            vkStencilOps);
+    } else {
+        renderPass = gpu->resourceProvider().findRenderPass(*rt,
+                                                            vkColorOps,
+                                                            vkStencilOps);
+    }
+
+    SkASSERT(renderPass->isCompatible(*rt->simpleRenderPass()));
+
+    GrVkPrimaryCommandBuffer* cmdBuffer = gpu->currentCommandBuffer();
+    cmdBuffer->beginRenderPass(gpu, renderPass, nullptr, *rt, bounds, true);
+
+    GrVkSecondaryCommandBuffer* secondary = gpu->cmdPool()->findOrCreateSecondaryCommandBuffer(gpu);
+    if (!secondary) {
+        return false;
+    }
+    secondary->begin(gpu, rt->framebuffer(), renderPass);
+
+    secondary->bindPipeline(gpu, pipeline);
+
+    // Uniform DescriptorSet, Sampler DescriptorSet, and vertex shader uniformBuffer
+    SkSTArray<3, const GrVkRecycledResource*> descriptorRecycledResources;
+    descriptorRecycledResources.push_back(uniformDS);
+    descriptorRecycledResources.push_back(samplerDS);
+    descriptorRecycledResources.push_back(fUniformBuffer->resource());
+
+    // One sampler, texture view, and texture
+    SkSTArray<3, const GrVkResource*> descriptorResources;
+    descriptorResources.push_back(sampler);
+    descriptorResources.push_back(srcTex->textureView());
+    descriptorResources.push_back(srcTex->resource());
+
+    secondary->bindDescriptorSets(gpu,
+                                  descriptorRecycledResources,
+                                  descriptorResources,
+                                  fPipelineLayout,
+                                  0,
+                                  2,
+                                  vkDescSets,
+                                  0,
+                                  nullptr);
+
+    // Set Dynamic viewport and stencil
+    // We always use one viewport the size of the RT
+    VkViewport viewport;
+    viewport.x = 0.0f;
+    viewport.y = 0.0f;
+    viewport.width = SkIntToScalar(rt->width());
+    viewport.height = SkIntToScalar(rt->height());
+    viewport.minDepth = 0.0f;
+    viewport.maxDepth = 1.0f;
+    secondary->setViewport(gpu, 0, 1, &viewport);
+
+    // We assume the scissor is not enabled so just set it to the whole RT
+    VkRect2D scissor;
+    scissor.extent.width = rt->width();
+    scissor.extent.height = rt->height();
+    scissor.offset.x = 0;
+    scissor.offset.y = 0;
+    secondary->setScissor(gpu, 0, 1, &scissor);
+
+    secondary->bindInputBuffer(gpu, 0, fVertexBuffer.get());
+    secondary->draw(gpu, 4, 1, 0, 0);
+    secondary->end(gpu);
+    cmdBuffer->executeCommands(gpu, secondary);
+    cmdBuffer->endRenderPass(gpu);
+    secondary->unref(gpu);
+
+    // Release all temp resources which should now be reffed by the cmd buffer
+    pipeline->unref(gpu);
+    uniformDS->unref(gpu);
+    samplerDS->unref(gpu);
+    sampler->unref(gpu);
+    renderPass->unref(gpu);
+
+    return true;
+}
+
+void GrVkCopyManager::destroyResources(GrVkGpu* gpu) {
+    if (VK_NULL_HANDLE != fVertShaderModule) {
+        GR_VK_CALL(gpu->vkInterface(), DestroyShaderModule(gpu->device(), fVertShaderModule,
+                                                           nullptr));
+        fVertShaderModule = VK_NULL_HANDLE;
+    }
+
+    if (VK_NULL_HANDLE != fFragShaderModule) {
+        GR_VK_CALL(gpu->vkInterface(), DestroyShaderModule(gpu->device(), fFragShaderModule,
+                                                           nullptr));
+        fFragShaderModule = VK_NULL_HANDLE;
+    }
+
+    if (fPipelineLayout) {
+        fPipelineLayout->unref(gpu);
+        fPipelineLayout = nullptr;
+    }
+
+    if (fUniformBuffer) {
+        fUniformBuffer->release(gpu);
+        fUniformBuffer.reset();
+    }
+}
+
+void GrVkCopyManager::abandonResources() {
+    fVertShaderModule = VK_NULL_HANDLE;
+    fFragShaderModule = VK_NULL_HANDLE;
+    if (fPipelineLayout) {
+        fPipelineLayout->unrefAndAbandon();
+        fPipelineLayout = nullptr;
+    }
+
+    if (fUniformBuffer) {
+        fUniformBuffer->abandon();
+        fUniformBuffer.reset();
+    }
+}
diff --git a/src/gpu/vk/GrVkCopyManager.h b/src/gpu/vk/GrVkCopyManager.h
new file mode 100644
index 0000000..e4f02b6
--- /dev/null
+++ b/src/gpu/vk/GrVkCopyManager.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+*/
+
+#ifndef GrVkCopyManager_DEFINED
+#define GrVkCopyManager_DEFINED
+
+#include "include/gpu/GrTypes.h"
+#include "include/gpu/vk/GrVkTypes.h"
+#include "src/gpu/vk/GrVkDescriptorSetManager.h"
+
+class GrSurface;
+class GrVkCopyPipeline;
+class GrVkGpu;
+class GrVkPipelineLayout;
+class GrVkUniformBuffer;
+class GrVkVertexBuffer;
+struct SkIPoint;
+struct SkIRect;
+
+class GrVkCopyManager {
+public:
+    GrVkCopyManager();
+
+    ~GrVkCopyManager();
+
+    bool copySurfaceAsDraw(GrVkGpu* gpu,
+                           GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                           GrSurface* src, GrSurfaceOrigin srcOrigin,
+                           const SkIRect& srcRect, const SkIPoint& dstPoint,
+                           bool canDiscardOutsideDstRect);
+
+    void destroyResources(GrVkGpu* gpu);
+    void abandonResources();
+
+private:
+    bool createCopyProgram(GrVkGpu* gpu);
+
+    // Everything below is only created once and shared by all copy draws/pipelines
+    VkShaderModule fVertShaderModule;
+    VkShaderModule fFragShaderModule;
+    VkPipelineShaderStageCreateInfo fShaderStageInfo[2];
+
+    GrVkDescriptorSetManager::Handle fSamplerDSHandle;
+    GrVkPipelineLayout* fPipelineLayout;
+
+    sk_sp<GrVkVertexBuffer> fVertexBuffer;
+    std::unique_ptr<GrVkUniformBuffer> fUniformBuffer;
+};
+
+#endif
diff --git a/src/gpu/vk/GrVkCopyPipeline.cpp b/src/gpu/vk/GrVkCopyPipeline.cpp
new file mode 100644
index 0000000..3b44f61
--- /dev/null
+++ b/src/gpu/vk/GrVkCopyPipeline.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/vk/GrVkCopyPipeline.h"
+
+#include "include/private/SkOnce.h"
+#include "src/gpu/vk/GrVkGpu.h"
+#include "src/gpu/vk/GrVkUtil.h"
+
+#if defined(SK_ENABLE_SCOPED_LSAN_SUPPRESSIONS)
+#include <sanitizer/lsan_interface.h>
+#endif
+
+static void setup_multisample_state(int numSamples,
+                                    VkPipelineMultisampleStateCreateInfo* multisampleInfo) {
+    memset(multisampleInfo, 0, sizeof(VkPipelineMultisampleStateCreateInfo));
+    multisampleInfo->sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
+    multisampleInfo->pNext = nullptr;
+    multisampleInfo->flags = 0;
+    SkAssertResult(GrSampleCountToVkSampleCount(numSamples,
+                                                &multisampleInfo->rasterizationSamples));
+    multisampleInfo->sampleShadingEnable = VK_FALSE;
+    multisampleInfo->minSampleShading = 0.0f;
+    multisampleInfo->pSampleMask = nullptr;
+    multisampleInfo->alphaToCoverageEnable = VK_FALSE;
+    multisampleInfo->alphaToOneEnable = VK_FALSE;
+}
+
+GrVkCopyPipeline* GrVkCopyPipeline::Create(GrVkGpu* gpu,
+                                           VkPipelineShaderStageCreateInfo* shaderStageInfo,
+                                           VkPipelineLayout pipelineLayout,
+                                           int numSamples,
+                                           const GrVkRenderPass& renderPass,
+                                           VkPipelineCache cache) {
+
+    static const VkVertexInputAttributeDescription attributeDesc = {
+        0,                        // location
+        0,                        // binding
+        VK_FORMAT_R32G32_SFLOAT,  // format
+        0,                        // offset
+    };
+
+    static const VkVertexInputBindingDescription bindingDesc = {
+        0,                           // binding
+        2 * sizeof(float),           // stride
+        VK_VERTEX_INPUT_RATE_VERTEX  // inputRate
+    };
+
+    static const VkPipelineVertexInputStateCreateInfo vertexInputInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,  // sType
+        nullptr,                                                    // pNext
+        0,                                                          // flags
+        1,                                                          // vertexBindingDescriptionCount
+        &bindingDesc,                                               // pVertexBindingDescriptions
+        1,                                                          // vertexAttributeDescriptionCnt
+        &attributeDesc,                                             // pVertexAttributeDescriptions
+    };
+
+    static const VkPipelineInputAssemblyStateCreateInfo inputAssemblyInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,  // sType
+        nullptr,                                                      // pNext
+        0,                                                            // flags
+        VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,                         // topology
+        VK_FALSE                                                      // primitiveRestartEnable
+    };
+
+    static const VkStencilOpState dummyStencilState = {
+        VK_STENCIL_OP_KEEP,   // failOp
+        VK_STENCIL_OP_KEEP,   // passOp
+        VK_STENCIL_OP_KEEP,   // depthFailOp
+        VK_COMPARE_OP_NEVER,  // compareOp
+        0,                    // compareMask
+        0,                    // writeMask
+        0                     // reference
+    };
+
+    static const VkPipelineDepthStencilStateCreateInfo stencilInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,  // sType
+        nullptr,                                                     // pNext
+        0,                                                           // flags
+        VK_FALSE,                                                    // depthTestEnable
+        VK_FALSE,                                                    // depthWriteEnable
+        VK_COMPARE_OP_ALWAYS,                                        // depthCompareOp
+        VK_FALSE,                                                    // depthBoundsTestEnable
+        VK_FALSE,                                                    // stencilTestEnable
+        dummyStencilState,                                           // front
+        dummyStencilState,                                           // bakc
+        0.0f,                                                        // minDepthBounds
+        1.0f                                                         // maxDepthBounds
+    };
+
+    static const VkPipelineViewportStateCreateInfo viewportInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,  // sType
+        nullptr,                                                // pNext
+        0,                                                      // flags
+        1,                                                      // viewportCount
+        nullptr,                                                // pViewports
+        1,                                                      // scissorCount
+        nullptr                                                 // pScissors
+    };
+
+    static const VkPipelineColorBlendAttachmentState attachmentState = {
+        VK_FALSE,                                             // blendEnable
+        VK_BLEND_FACTOR_ONE,                                  // srcColorBlendFactor
+        VK_BLEND_FACTOR_ZERO,                                 // dstColorBlendFactor
+        VK_BLEND_OP_ADD,                                      // colorBlendOp
+        VK_BLEND_FACTOR_ONE,                                  // srcAlphaBlendFactor
+        VK_BLEND_FACTOR_ZERO,                                 // dstAlphaBlendFactor
+        VK_BLEND_OP_ADD,                                      // alphaBlendOp
+        VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | // colorWriteMask
+        VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT   // colorWriteMask
+    };
+
+    static const VkPipelineColorBlendStateCreateInfo colorBlendInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,  // sType
+        nullptr,                                                   // pNext
+        0,                                                         // flags
+        VK_FALSE,                                                  // logicOpEnable
+        VK_LOGIC_OP_CLEAR,                                         // logicOp
+        1,                                                         // attachmentCount
+        &attachmentState,                                          // pAttachments
+        { 0.f, 0.f, 0.f, 0.f }                                       // blendConstants[4]
+    };
+
+    static const VkPipelineRasterizationStateCreateInfo rasterInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,  // sType
+        nullptr,                                                     // pNext
+        0,                                                           // flags
+        VK_FALSE,                                                    // depthClampEnable
+        VK_FALSE,                                                    // rasterizerDiscardEnabled
+        VK_POLYGON_MODE_FILL,                                        // polygonMode
+        VK_CULL_MODE_NONE,                                           // cullMode
+        VK_FRONT_FACE_COUNTER_CLOCKWISE,                             // frontFace
+        VK_FALSE,                                                    // depthBiasEnable
+        0.0f,                                                        // depthBiasConstantFactor
+        0.0f,                                                        // depthBiasClamp
+        0.0f,                                                        // depthBiasSlopeFactor
+        1.0f                                                         // lineWidth
+    };
+
+    static const VkDynamicState dynamicStates[2] = { VK_DYNAMIC_STATE_VIEWPORT,
+        VK_DYNAMIC_STATE_SCISSOR };
+    static const VkPipelineDynamicStateCreateInfo dynamicInfo = {
+        VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,  // sType
+        nullptr,                                               // pNext
+        0,                                                     // flags
+        2,                                                     // dynamicStateCount
+        dynamicStates                                          // pDynamicStates
+    };
+
+    VkPipelineMultisampleStateCreateInfo multisampleInfo;
+    setup_multisample_state(numSamples, &multisampleInfo);
+
+    VkGraphicsPipelineCreateInfo pipelineCreateInfo;
+    memset(&pipelineCreateInfo, 0, sizeof(VkGraphicsPipelineCreateInfo));
+    pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
+    pipelineCreateInfo.pNext = nullptr;
+    pipelineCreateInfo.flags = 0;
+    pipelineCreateInfo.stageCount = 2;
+    pipelineCreateInfo.pStages = shaderStageInfo;
+    pipelineCreateInfo.pVertexInputState = &vertexInputInfo;
+    pipelineCreateInfo.pInputAssemblyState = &inputAssemblyInfo;
+    pipelineCreateInfo.pTessellationState = nullptr;
+    pipelineCreateInfo.pViewportState = &viewportInfo;
+    pipelineCreateInfo.pRasterizationState = &rasterInfo;
+    pipelineCreateInfo.pMultisampleState = &multisampleInfo;
+    pipelineCreateInfo.pDepthStencilState = &stencilInfo;
+    pipelineCreateInfo.pColorBlendState = &colorBlendInfo;
+    pipelineCreateInfo.pDynamicState = &dynamicInfo;
+    pipelineCreateInfo.layout = pipelineLayout;
+    pipelineCreateInfo.renderPass = renderPass.vkRenderPass();
+    pipelineCreateInfo.subpass = 0;
+    pipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE;
+    pipelineCreateInfo.basePipelineIndex = -1;
+
+    VkPipeline vkPipeline;
+    VkResult err;
+    {
+#if defined(SK_ENABLE_SCOPED_LSAN_SUPPRESSIONS)
+        // skia:8712
+        __lsan::ScopedDisabler lsanDisabler;
+#endif
+        err = GR_VK_CALL(gpu->vkInterface(), CreateGraphicsPipelines(gpu->device(),
+                                                                     cache, 1,
+                                                                     &pipelineCreateInfo,
+                                                                     nullptr, &vkPipeline));
+    }
+    if (err) {
+        SkDebugf("Failed to create copy pipeline. Error: %d\n", err);
+        return nullptr;
+    }
+
+    return new GrVkCopyPipeline(vkPipeline, &renderPass);
+}
+
+bool GrVkCopyPipeline::isCompatible(const GrVkRenderPass& rp) const {
+    return rp.isCompatible(*fRenderPass);
+}
diff --git a/src/gpu/vk/GrVkCopyPipeline.h b/src/gpu/vk/GrVkCopyPipeline.h
new file mode 100644
index 0000000..9b8d2d2
--- /dev/null
+++ b/src/gpu/vk/GrVkCopyPipeline.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrVkCopyPipeline_DEFINED
+#define GrVkCopyPipeline_DEFINED
+
+#include "src/gpu/vk/GrVkPipeline.h"
+
+class GrVkCopyPipeline : public GrVkPipeline {
+public:
+    // We expect the passed in renderPass to be stored on the GrVkResourceProvider and not a local
+    // object of the client.
+    static GrVkCopyPipeline* Create(GrVkGpu* gpu,
+                                    VkPipelineShaderStageCreateInfo* shaderStageInfo,
+                                    VkPipelineLayout pipelineLayout,
+                                    int numSamples,
+                                    const GrVkRenderPass& renderPass,
+                                    VkPipelineCache cache);
+
+    bool isCompatible(const GrVkRenderPass& rp) const;
+
+#ifdef SK_TRACE_VK_RESOURCES
+    void dumpInfo() const override {
+        SkDebugf("GrVkCopyPipeline: %d (%d refs)\n", fPipeline, this->getRefCnt());
+    }
+#endif
+
+private:
+    GrVkCopyPipeline(VkPipeline pipeline, const GrVkRenderPass* renderPass)
+        : INHERITED(pipeline)
+        , fRenderPass(renderPass) {
+    }
+
+    const GrVkRenderPass* fRenderPass;
+
+    typedef GrVkPipeline INHERITED;
+};
+
+#endif
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 8a5f979..9b4d019 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -262,6 +262,9 @@
     }
     fSemaphoresToSignal.reset();
 
+
+    fCopyManager.destroyResources(this);
+
     // must call this just before we destroy the command pool and VkDevice
     fResourceProvider.destroyResources(VK_ERROR_DEVICE_LOST == res);
 
@@ -296,6 +299,7 @@
             for (int i = 0; i < fSemaphoresToSignal.count(); ++i) {
                 fSemaphoresToSignal[i]->unrefAndAbandon();
             }
+            fCopyManager.abandonResources();
 
             // must call this just before we destroy the command pool and VkDevice
             fResourceProvider.abandonResources();
@@ -781,11 +785,6 @@
     // R8G8B8A8_UNORM image and then copy it.
     sk_sp<GrVkTexture> copyTexture;
     if (dataColorType == GrColorType::kRGB_888x && tex->imageFormat() == VK_FORMAT_R8G8B8_UNORM) {
-        bool dstHasYcbcr = tex->ycbcrConversionInfo().isValid();
-        if (!this->vkCaps().canCopyAsBlit(tex->config(), 1, false, dstHasYcbcr,
-                                          kRGBA_8888_GrPixelConfig, 1, false, false)) {
-            return false;
-        }
         GrSurfaceDesc surfDesc;
         surfDesc.fFlags = kRenderTarget_GrSurfaceFlag;
         surfDesc.fWidth = width;
@@ -814,6 +813,16 @@
             return false;
         }
 
+        bool dstHasYcbcr = tex->ycbcrConversionInfo().isValid();
+        if (!this->vkCaps().canCopyAsBlit(tex->config(), 1, false, dstHasYcbcr,
+                                          copyTexture->config(), 1, false,
+                                          false) &&
+            !this->vkCaps().canCopyAsDraw(tex->config(), SkToBool(tex->asRenderTarget()),
+                                          dstHasYcbcr,
+                                          copyTexture->config(), true, false)) {
+            return false;
+        }
+
         uploadTexture = copyTexture.get();
         uploadLeft = 0;
         uploadTop = 0;
@@ -874,8 +883,10 @@
     // now.
     if (copyTexture.get()) {
         SkASSERT(dataColorType == GrColorType::kRGB_888x);
-        SkAssertResult(this->copySurface(tex, copyTexture.get(), SkIRect::MakeWH(width, height),
-                                         SkIPoint::Make(left, top), false));
+        static const GrSurfaceOrigin kOrigin = kTopLeft_GrSurfaceOrigin;
+        SkAssertResult(this->copySurface(tex, kOrigin, copyTexture.get(), kOrigin,
+                                         SkIRect::MakeWH(width, height), SkIPoint::Make(left, top),
+                                         false));
     }
     if (1 == mipLevelCount) {
         tex->texturePriv().markMipMapsDirty();
@@ -2057,16 +2068,19 @@
     return 0;
 }
 
-void GrVkGpu::copySurfaceAsCopyImage(GrSurface* dst, GrSurface* src, GrVkImage* dstImage,
-                                     GrVkImage* srcImage, const SkIRect& srcRect,
+void GrVkGpu::copySurfaceAsCopyImage(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                     GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                     GrVkImage* dstImage,
+                                     GrVkImage* srcImage,
+                                     const SkIRect& srcRect,
                                      const SkIPoint& dstPoint) {
 #ifdef SK_DEBUG
     int dstSampleCnt = get_surface_sample_cnt(dst);
     int srcSampleCnt = get_surface_sample_cnt(src);
     bool dstHasYcbcr = dstImage->ycbcrConversionInfo().isValid();
     bool srcHasYcbcr = srcImage->ycbcrConversionInfo().isValid();
-    SkASSERT(this->vkCaps().canCopyImage(dst->config(), dstSampleCnt, dstHasYcbcr,
-                                         src->config(), srcSampleCnt, srcHasYcbcr));
+    SkASSERT(this->vkCaps().canCopyImage(dst->config(), dstSampleCnt, dstOrigin, dstHasYcbcr,
+                                         src->config(), srcSampleCnt, srcOrigin, srcHasYcbcr));
 
 #endif
 
@@ -2084,13 +2098,24 @@
                              VK_PIPELINE_STAGE_TRANSFER_BIT,
                              false);
 
+    // Flip rect if necessary
+    SkIRect srcVkRect = srcRect;
+    int32_t dstY = dstPoint.fY;
+
+    if (kBottomLeft_GrSurfaceOrigin == srcOrigin) {
+        SkASSERT(kBottomLeft_GrSurfaceOrigin == dstOrigin);
+        srcVkRect.fTop = src->height() - srcRect.fBottom;
+        srcVkRect.fBottom =  src->height() - srcRect.fTop;
+        dstY = dst->height() - dstPoint.fY - srcVkRect.height();
+    }
+
     VkImageCopy copyRegion;
     memset(&copyRegion, 0, sizeof(VkImageCopy));
     copyRegion.srcSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
-    copyRegion.srcOffset = { srcRect.fLeft, srcRect.fTop, 0 };
+    copyRegion.srcOffset = { srcVkRect.fLeft, srcVkRect.fTop, 0 };
     copyRegion.dstSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
-    copyRegion.dstOffset = { dstPoint.fX, dstPoint.fY, 0 };
-    copyRegion.extent = { (uint32_t)srcRect.width(), (uint32_t)srcRect.height(), 1 };
+    copyRegion.dstOffset = { dstPoint.fX, dstY, 0 };
+    copyRegion.extent = { (uint32_t)srcVkRect.width(), (uint32_t)srcVkRect.height(), 1 };
 
     fCurrentCmdBuffer->copyImage(this,
                                  srcImage,
@@ -2102,12 +2127,14 @@
 
     SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.fX, dstPoint.fY,
                                         srcRect.width(), srcRect.height());
-    // The rect is already in device space so we pass in kTopLeft so no flip is done.
-    this->didWriteToSurface(dst, kTopLeft_GrSurfaceOrigin, &dstRect);
+    this->didWriteToSurface(dst, dstOrigin, &dstRect);
 }
 
-void GrVkGpu::copySurfaceAsBlit(GrSurface* dst, GrSurface* src, GrVkImage* dstImage,
-                                GrVkImage* srcImage, const SkIRect& srcRect,
+void GrVkGpu::copySurfaceAsBlit(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                GrVkImage* dstImage,
+                                GrVkImage* srcImage,
+                                const SkIRect& srcRect,
                                 const SkIPoint& dstPoint) {
 #ifdef SK_DEBUG
     int dstSampleCnt = get_surface_sample_cnt(dst);
@@ -2132,14 +2159,40 @@
                              false);
 
     // Flip rect if necessary
-    SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.fX, dstPoint.fY, srcRect.width(),
-                                        srcRect.height());
+    SkIRect srcVkRect;
+    srcVkRect.fLeft = srcRect.fLeft;
+    srcVkRect.fRight = srcRect.fRight;
+    SkIRect dstRect;
+    dstRect.fLeft = dstPoint.fX;
+    dstRect.fRight = dstPoint.fX + srcRect.width();
+
+    if (kBottomLeft_GrSurfaceOrigin == srcOrigin) {
+        srcVkRect.fTop = src->height() - srcRect.fBottom;
+        srcVkRect.fBottom = src->height() - srcRect.fTop;
+    } else {
+        srcVkRect.fTop = srcRect.fTop;
+        srcVkRect.fBottom = srcRect.fBottom;
+    }
+
+    if (kBottomLeft_GrSurfaceOrigin == dstOrigin) {
+        dstRect.fTop = dst->height() - dstPoint.fY - srcVkRect.height();
+    } else {
+        dstRect.fTop = dstPoint.fY;
+    }
+    dstRect.fBottom = dstRect.fTop + srcVkRect.height();
+
+    // If we have different origins, we need to flip the top and bottom of the dst rect so that we
+    // get the correct origintation of the copied data.
+    if (srcOrigin != dstOrigin) {
+        using std::swap;
+        swap(dstRect.fTop, dstRect.fBottom);
+    }
 
     VkImageBlit blitRegion;
     memset(&blitRegion, 0, sizeof(VkImageBlit));
     blitRegion.srcSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
-    blitRegion.srcOffsets[0] = { srcRect.fLeft, srcRect.fTop, 0 };
-    blitRegion.srcOffsets[1] = { srcRect.fRight, srcRect.fBottom, 1 };
+    blitRegion.srcOffsets[0] = { srcVkRect.fLeft, srcVkRect.fTop, 0 };
+    blitRegion.srcOffsets[1] = { srcVkRect.fRight, srcVkRect.fBottom, 1 };
     blitRegion.dstSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
     blitRegion.dstOffsets[0] = { dstRect.fLeft, dstRect.fTop, 0 };
     blitRegion.dstOffsets[1] = { dstRect.fRight, dstRect.fBottom, 1 };
@@ -2151,22 +2204,32 @@
                                  &blitRegion,
                                  VK_FILTER_NEAREST); // We never scale so any filter works here
 
-    // The rect is already in device space so we pass in kTopLeft so no flip is done.
-    this->didWriteToSurface(dst, kTopLeft_GrSurfaceOrigin, &dstRect);
+    dstRect = SkIRect::MakeXYWH(dstPoint.fX, dstPoint.fY, srcRect.width(), srcRect.height());
+    this->didWriteToSurface(dst, dstOrigin, &dstRect);
 }
 
-void GrVkGpu::copySurfaceAsResolve(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                                   const SkIPoint& dstPoint) {
+void GrVkGpu::copySurfaceAsResolve(GrSurface* dst, GrSurfaceOrigin dstOrigin, GrSurface* src,
+                                   GrSurfaceOrigin srcOrigin, const SkIRect& origSrcRect,
+                                   const SkIPoint& origDstPoint) {
     GrVkRenderTarget* srcRT = static_cast<GrVkRenderTarget*>(src->asRenderTarget());
+    SkIRect srcRect = origSrcRect;
+    SkIPoint dstPoint = origDstPoint;
+    if (kBottomLeft_GrSurfaceOrigin == srcOrigin) {
+        SkASSERT(kBottomLeft_GrSurfaceOrigin == dstOrigin);
+        srcRect = {origSrcRect.fLeft, src->height() - origSrcRect.fBottom,
+                   origSrcRect.fRight, src->height() - origSrcRect.fTop};
+        dstPoint.fY = dst->height() - dstPoint.fY - srcRect.height();
+    }
     this->resolveImage(dst, srcRT, srcRect, dstPoint);
-    SkIRect dstRect = SkIRect::MakeXYWH(dstPoint.fX, dstPoint.fY,
+    SkIRect dstRect = SkIRect::MakeXYWH(origDstPoint.fX, origDstPoint.fY,
                                         srcRect.width(), srcRect.height());
-    // The rect is already in device space so we pass in kTopLeft so no flip is done.
-    this->didWriteToSurface(dst, kTopLeft_GrSurfaceOrigin, &dstRect);
+    this->didWriteToSurface(dst, dstOrigin, &dstRect);
 }
 
-bool GrVkGpu::onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
-                            const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) {
+bool GrVkGpu::onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                            GrSurface* src, GrSurfaceOrigin srcOrigin,
+                            const SkIRect& srcRect, const SkIPoint& dstPoint,
+                            bool canDiscardOutsideDstRect) {
 #ifdef SK_DEBUG
     if (GrVkRenderTarget* srcRT = static_cast<GrVkRenderTarget*>(src->asRenderTarget())) {
         SkASSERT(!srcRT->wrapsSecondaryCommandBuffer());
@@ -2207,22 +2270,33 @@
     bool dstHasYcbcr = dstImage->ycbcrConversionInfo().isValid();
     bool srcHasYcbcr = srcImage->ycbcrConversionInfo().isValid();
 
-    if (this->vkCaps().canCopyAsResolve(dstConfig, dstSampleCnt, dstHasYcbcr,
-                                        srcConfig, srcSampleCnt, srcHasYcbcr)) {
-        this->copySurfaceAsResolve(dst, src, srcRect, dstPoint);
+    if (this->vkCaps().canCopyAsResolve(dstConfig, dstSampleCnt, dstOrigin, dstHasYcbcr,
+                                        srcConfig, srcSampleCnt, srcOrigin, srcHasYcbcr)) {
+        this->copySurfaceAsResolve(dst, dstOrigin, src, srcOrigin, srcRect, dstPoint);
         return true;
     }
 
-    if (this->vkCaps().canCopyImage(dstConfig, dstSampleCnt, dstHasYcbcr,
-                                    srcConfig, srcSampleCnt, srcHasYcbcr)) {
-        this->copySurfaceAsCopyImage(dst, src, dstImage, srcImage, srcRect, dstPoint);
+    if (this->vkCaps().canCopyAsDraw(dstConfig, SkToBool(dst->asRenderTarget()), dstHasYcbcr,
+                                     srcConfig, SkToBool(src->asTexture()), srcHasYcbcr)) {
+        SkAssertResult(fCopyManager.copySurfaceAsDraw(this, dst, dstOrigin, src, srcOrigin, srcRect,
+                                                      dstPoint, canDiscardOutsideDstRect));
+        auto dstRect = srcRect.makeOffset(dstPoint.fX, dstPoint.fY);
+        this->didWriteToSurface(dst, dstOrigin, &dstRect);
+        return true;
+    }
+
+    if (this->vkCaps().canCopyImage(dstConfig, dstSampleCnt, dstOrigin, dstHasYcbcr,
+                                    srcConfig, srcSampleCnt, srcOrigin, srcHasYcbcr)) {
+        this->copySurfaceAsCopyImage(dst, dstOrigin, src, srcOrigin, dstImage, srcImage,
+                                     srcRect, dstPoint);
         return true;
     }
 
     if (this->vkCaps().canCopyAsBlit(dstConfig, dstSampleCnt, dstImage->isLinearTiled(),
                                      dstHasYcbcr, srcConfig, srcSampleCnt,
                                      srcImage->isLinearTiled(), srcHasYcbcr)) {
-        this->copySurfaceAsBlit(dst, src, dstImage, srcImage, srcRect, dstPoint);
+        this->copySurfaceAsBlit(dst, dstOrigin, src, srcOrigin, dstImage, srcImage,
+                                srcRect, dstPoint);
         return true;
     }
 
@@ -2272,17 +2346,6 @@
     if (dstColorType == GrColorType::kRGB_888x && image->imageFormat() == VK_FORMAT_R8G8B8_UNORM) {
         SkASSERT(surface->config() == kRGB_888_GrPixelConfig);
 
-        int srcSampleCount = 0;
-        if (rt) {
-            srcSampleCount = rt->numColorSamples();
-        }
-        bool srcHasYcbcr = image->ycbcrConversionInfo().isValid();
-        if (!this->vkCaps().canCopyAsBlit(kRGBA_8888_GrPixelConfig, 1, false, false,
-                                          surface->config(), srcSampleCount, image->isLinearTiled(),
-                                          srcHasYcbcr)) {
-            return false;
-        }
-
         // Make a new surface that is RGBA to copy the RGB surface into.
         GrSurfaceDesc surfDesc;
         surfDesc.fFlags = kRenderTarget_GrSurfaceFlag;
@@ -2313,9 +2376,25 @@
             return false;
         }
 
+        int srcSampleCount = 0;
+        if (rt) {
+            srcSampleCount = rt->numColorSamples();
+        }
+        bool srcHasYcbcr = image->ycbcrConversionInfo().isValid();
+        if (!this->vkCaps().canCopyAsBlit(copySurface->config(), 1, false, false,
+                                          surface->config(), srcSampleCount, image->isLinearTiled(),
+                                          srcHasYcbcr) &&
+            !this->vkCaps().canCopyAsDraw(copySurface->config(), false, false,
+                                          surface->config(), SkToBool(surface->asTexture()),
+                                          srcHasYcbcr)) {
+            return false;
+        }
         SkIRect srcRect = SkIRect::MakeXYWH(left, top, width, height);
-        SkAssertResult(this->copySurface(copySurface.get(), surface, srcRect, SkIPoint::Make(0,0)));
-
+        static const GrSurfaceOrigin kOrigin = kTopLeft_GrSurfaceOrigin;
+        if (!this->copySurface(copySurface.get(), kOrigin, surface, kOrigin,
+                               srcRect, SkIPoint::Make(0,0))) {
+            return false;
+        }
         top = 0;
         left = 0;
         dstColorType = GrColorType::kRGBA_8888;
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index d1b2d98..820d801 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -12,6 +12,7 @@
 #include "include/gpu/vk/GrVkTypes.h"
 #include "src/gpu/GrGpu.h"
 #include "src/gpu/vk/GrVkCaps.h"
+#include "src/gpu/vk/GrVkCopyManager.h"
 #include "src/gpu/vk/GrVkIndexBuffer.h"
 #include "src/gpu/vk/GrVkMemory.h"
 #include "src/gpu/vk/GrVkResourceProvider.h"
@@ -221,7 +222,8 @@
     bool onTransferPixelsFrom(GrSurface* surface, int left, int top, int width, int height,
                               GrColorType, GrGpuBuffer* transferBuffer, size_t offset) override;
 
-    bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
+    bool onCopySurface(GrSurface* dst, GrSurfaceOrigin dstOrigin, GrSurface* src,
+                       GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
                        const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) override;
 
     void onFinishFlush(GrSurfaceProxy*[], int, SkSurface::BackendSurfaceAccess access,
@@ -238,14 +240,21 @@
 
     void internalResolveRenderTarget(GrRenderTarget*, bool requiresSubmit);
 
-    void copySurfaceAsCopyImage(GrSurface* dst, GrSurface* src, GrVkImage* dstImage,
-                                GrVkImage* srcImage, const SkIRect& srcRect,
+    void copySurfaceAsCopyImage(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                                GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                GrVkImage* dstImage, GrVkImage* srcImage,
+                                const SkIRect& srcRect,
                                 const SkIPoint& dstPoint);
 
-    void copySurfaceAsBlit(GrSurface* dst, GrSurface* src, GrVkImage* dstImage, GrVkImage* srcImage,
-                           const SkIRect& srcRect, const SkIPoint& dstPoint);
+    void copySurfaceAsBlit(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                           GrSurface* src, GrSurfaceOrigin srcOrigin,
+                           GrVkImage* dstImage, GrVkImage* srcImage,
+                           const SkIRect& srcRect,
+                           const SkIPoint& dstPoint);
 
-    void copySurfaceAsResolve(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
+    void copySurfaceAsResolve(GrSurface* dst, GrSurfaceOrigin dstOrigin,
+                              GrSurface* src, GrSurfaceOrigin srcOrigin,
+                              const SkIRect& srcRect,
                               const SkIPoint& dstPoint);
 
     // helpers for onCreateTexture and writeTexturePixels
@@ -289,6 +298,8 @@
     VkPhysicalDeviceProperties                            fPhysDevProps;
     VkPhysicalDeviceMemoryProperties                      fPhysDevMemProps;
 
+    GrVkCopyManager                                       fCopyManager;
+
     // compiler used for compiling sksl into spirv. We only want to create the compiler once since
     // there is significant overhead to the first compile of any compiler.
     SkSL::Compiler*                                       fCompiler;
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
index 726095d..c95f1e5 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
@@ -46,19 +46,23 @@
 
 class Copy : public GrVkPrimaryCommandBufferTask {
 public:
-    Copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint, bool shouldDiscardDst)
+    Copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+         const SkIPoint& dstPoint, bool shouldDiscardDst)
             : fSrc(src)
+            , fSrcOrigin(srcOrigin)
             , fSrcRect(srcRect)
             , fDstPoint(dstPoint)
             , fShouldDiscardDst(shouldDiscardDst) {}
 
     void execute(const Args& args) override {
-        args.fGpu->copySurface(args.fSurface, fSrc.get(), fSrcRect, fDstPoint, fShouldDiscardDst);
+        args.fGpu->copySurface(args.fSurface, args.fOrigin, fSrc.get(), fSrcOrigin, fSrcRect,
+                               fDstPoint, fShouldDiscardDst);
     }
 
 private:
     using Src = GrPendingIOResource<GrSurface, kRead_GrIOType>;
     Src fSrc;
+    GrSurfaceOrigin fSrcOrigin;
     SkIRect fSrcRect;
     SkIPoint fDstPoint;
     bool fShouldDiscardDst;
@@ -90,9 +94,9 @@
 
 /////////////////////////////////////////////////////////////////////////////
 
-void GrVkGpuTextureCommandBuffer::copy(GrSurface* src, const SkIRect& srcRect,
-                                       const SkIPoint& dstPoint) {
-    fTasks.emplace<Copy>(src, srcRect, dstPoint, false);
+void GrVkGpuTextureCommandBuffer::copy(GrSurface* src, GrSurfaceOrigin srcOrigin,
+                                       const SkIRect& srcRect, const SkIPoint& dstPoint) {
+    fTasks.emplace<Copy>(src, srcOrigin, srcRect, dstPoint, false);
 }
 
 void GrVkGpuTextureCommandBuffer::transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
@@ -105,7 +109,7 @@
 }
 
 void GrVkGpuTextureCommandBuffer::submit() {
-    GrVkPrimaryCommandBufferTask::Args taskArgs{fGpu, fTexture};
+    GrVkPrimaryCommandBufferTask::Args taskArgs{fGpu, fTexture, fOrigin};
     for (auto& task : fTasks) {
         task.execute(taskArgs);
     }
@@ -227,7 +231,7 @@
     GrStencilAttachment* stencil = fRenderTarget->renderTargetPriv().getStencilAttachment();
     auto currPreCmd = fPreCommandBufferTasks.begin();
 
-    GrVkPrimaryCommandBufferTask::Args taskArgs{fGpu, fRenderTarget};
+    GrVkPrimaryCommandBufferTask::Args taskArgs{fGpu, fRenderTarget, fOrigin};
     for (int i = 0; i < fCommandBufferInfos.count(); ++i) {
         CommandBufferInfo& cbInfo = fCommandBufferInfos[i];
 
@@ -600,7 +604,7 @@
     ++fCommandBufferInfos[fCurrentCmdInfo].fNumPreCmds;
 }
 
-void GrVkGpuRTCommandBuffer::copy(GrSurface* src, const SkIRect& srcRect,
+void GrVkGpuRTCommandBuffer::copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
                                   const SkIPoint& dstPoint) {
     CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
     if (!cbInfo.fIsEmpty || LoadStoreState::kStartsWithClear == cbInfo.fLoadStoreState) {
@@ -608,7 +612,8 @@
     }
 
     fPreCommandBufferTasks.emplace<Copy>(
-            src, srcRect, dstPoint, LoadStoreState::kStartsWithDiscard == cbInfo.fLoadStoreState);
+            src, srcOrigin, srcRect, dstPoint,
+            LoadStoreState::kStartsWithDiscard == cbInfo.fLoadStoreState);
     ++fCommandBufferInfos[fCurrentCmdInfo].fNumPreCmds;
 
     if (LoadStoreState::kLoadAndStore != cbInfo.fLoadStoreState) {
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h
index 395ae24..76c1284 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.h
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.h
@@ -31,6 +31,7 @@
     struct Args {
         GrGpu* fGpu;
         GrSurface* fSurface;
+        GrSurfaceOrigin fOrigin;
     };
 
     virtual void execute(const Args& args) = 0;
@@ -45,7 +46,8 @@
 public:
     GrVkGpuTextureCommandBuffer(GrVkGpu* gpu) : fGpu(gpu) {}
 
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override;
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override;
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
                       GrGpuBuffer* transferBuffer, size_t offset) override;
 
@@ -79,7 +81,8 @@
 
     void inlineUpload(GrOpFlushState* state, GrDeferredTextureUploadFn& upload) override;
 
-    void copy(GrSurface* src, const SkIRect& srcRect, const SkIPoint& dstPoint) override;
+    void copy(GrSurface* src, GrSurfaceOrigin srcOrigin, const SkIRect& srcRect,
+              const SkIPoint& dstPoint) override;
     void transferFrom(const SkIRect& srcRect, GrColorType bufferColorType,
                       GrGpuBuffer* transferBuffer, size_t offset) override;
 
diff --git a/src/gpu/vk/GrVkResourceProvider.cpp b/src/gpu/vk/GrVkResourceProvider.cpp
index 81cd3a6..87f653e 100644
--- a/src/gpu/vk/GrVkResourceProvider.cpp
+++ b/src/gpu/vk/GrVkResourceProvider.cpp
@@ -12,6 +12,7 @@
 #include "src/gpu/GrContextPriv.h"
 #include "src/gpu/vk/GrVkCommandBuffer.h"
 #include "src/gpu/vk/GrVkCommandPool.h"
+#include "src/gpu/vk/GrVkCopyPipeline.h"
 #include "src/gpu/vk/GrVkGpu.h"
 #include "src/gpu/vk/GrVkPipeline.h"
 #include "src/gpu/vk/GrVkRenderTarget.h"
@@ -106,6 +107,33 @@
             shaderStageCount, primitiveType, compatibleRenderPass, layout, this->pipelineCache());
 }
 
+GrVkCopyPipeline* GrVkResourceProvider::findOrCreateCopyPipeline(
+        const GrVkRenderTarget* dst,
+        VkPipelineShaderStageCreateInfo* shaderStageInfo,
+        VkPipelineLayout pipelineLayout) {
+    // Find or Create a compatible pipeline
+    GrVkCopyPipeline* pipeline = nullptr;
+    for (int i = 0; i < fCopyPipelines.count() && !pipeline; ++i) {
+        if (fCopyPipelines[i]->isCompatible(*dst->simpleRenderPass())) {
+            pipeline = fCopyPipelines[i];
+        }
+    }
+    if (!pipeline) {
+        pipeline = GrVkCopyPipeline::Create(fGpu, shaderStageInfo,
+                                            pipelineLayout,
+                                            dst->numColorSamples(),
+                                            *dst->simpleRenderPass(),
+                                            this->pipelineCache());
+        if (!pipeline) {
+            return nullptr;
+        }
+        fCopyPipelines.push_back(pipeline);
+    }
+    SkASSERT(pipeline);
+    pipeline->ref();
+    return pipeline;
+}
+
 // To create framebuffers, we first need to create a simple RenderPass that is
 // only used for framebuffer creation. When we actually render we will create
 // RenderPasses as needed that are compatible with the framebuffer.
@@ -376,6 +404,11 @@
         taskGroup->wait();
     }
 
+    // Release all copy pipelines
+    for (int i = 0; i < fCopyPipelines.count(); ++i) {
+        fCopyPipelines[i]->unref(fGpu);
+    }
+
     // loop over all render pass sets to make sure we destroy all the internal VkRenderPasses
     for (int i = 0; i < fRenderPassArray.count(); ++i) {
         fRenderPassArray[i].releaseResources(fGpu);
@@ -444,6 +477,11 @@
     }
     fAvailableCommandPools.reset();
 
+    // Abandon all copy pipelines
+    for (int i = 0; i < fCopyPipelines.count(); ++i) {
+        fCopyPipelines[i]->unrefAndAbandon();
+    }
+
     // loop over all render pass sets to make sure we destroy all the internal VkRenderPasses
     for (int i = 0; i < fRenderPassArray.count(); ++i) {
         fRenderPassArray[i].abandonResources();
diff --git a/src/gpu/vk/GrVkResourceProvider.h b/src/gpu/vk/GrVkResourceProvider.h
index 649d461..3df1a4d 100644
--- a/src/gpu/vk/GrVkResourceProvider.h
+++ b/src/gpu/vk/GrVkResourceProvider.h
@@ -27,6 +27,7 @@
 #include <thread>
 
 class GrVkCommandPool;
+class GrVkCopyPipeline;
 class GrVkGpu;
 class GrVkPipeline;
 class GrVkPipelineState;
@@ -54,6 +55,10 @@
                                  VkRenderPass compatibleRenderPass,
                                  VkPipelineLayout layout);
 
+    GrVkCopyPipeline* findOrCreateCopyPipeline(const GrVkRenderTarget* dst,
+                                               VkPipelineShaderStageCreateInfo*,
+                                               VkPipelineLayout);
+
     GR_DEFINE_RESOURCE_HANDLE_CLASS(CompatibleRPHandle);
 
     // Finds or creates a simple render pass that matches the target, increments the refcount,
@@ -264,6 +269,9 @@
     // Central cache for creating pipelines
     VkPipelineCache fPipelineCache;
 
+    // Cache of previously created copy pipelines
+    SkTArray<GrVkCopyPipeline*> fCopyPipelines;
+
     SkSTArray<4, CompatibleRenderPassSet> fRenderPassArray;
 
     SkTArray<const GrVkRenderPass*> fExternalRenderPasses;
diff --git a/src/image/SkImage_GpuBase.cpp b/src/image/SkImage_GpuBase.cpp
index 133e491..6cfc3e4 100644
--- a/src/image/SkImage_GpuBase.cpp
+++ b/src/image/SkImage_GpuBase.cpp
@@ -112,17 +112,31 @@
 
     sk_sp<GrSurfaceProxy> proxy = this->asTextureProxyRef(context);
 
-    sk_sp<GrTextureProxy> copyProxy = GrSurfaceProxy::Copy(
-            context, proxy.get(), GrMipMapped::kNo, subset, SkBackingFit::kExact,
-            proxy->isBudgeted());
+    GrSurfaceDesc desc;
+    desc.fWidth = subset.width();
+    desc.fHeight = subset.height();
+    desc.fConfig = proxy->config();
 
-    if (!copyProxy) {
+    GrBackendFormat format = proxy->backendFormat().makeTexture2D();
+    if (!format.isValid()) {
+        return nullptr;
+    }
+
+    // TODO: Should this inherit our proxy's budgeted status?
+    sk_sp<GrSurfaceContext> sContext(context->priv().makeDeferredSurfaceContext(
+            format, desc, proxy->origin(), GrMipMapped::kNo, SkBackingFit::kExact,
+            proxy->isBudgeted()));
+    if (!sContext) {
+        return nullptr;
+    }
+
+    if (!sContext->copy(proxy.get(), subset, SkIPoint::Make(0, 0))) {
         return nullptr;
     }
 
     // MDB: this call is okay bc we know 'sContext' was kExact
     return sk_make_sp<SkImage_Gpu>(fContext, kNeedNewImageUniqueID, this->alphaType(),
-                                   std::move(copyProxy), this->refColorSpace());
+                                   sContext->asTextureProxyRef(), this->refColorSpace());
 }
 
 static void apply_premul(const SkImageInfo& info, void* pixels, size_t rowBytes) {