Direct3D: implement transfer methods

Change-Id: Id41287668971d464e517e28757736d7d3b019666
Bug: skia:11977
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/406356
Commit-Queue: Jim Van Verth <jvanverth@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
diff --git a/src/gpu/GrCaps.h b/src/gpu/GrCaps.h
index 33bbdf2..986dd97 100644
--- a/src/gpu/GrCaps.h
+++ b/src/gpu/GrCaps.h
@@ -210,6 +210,8 @@
 
     uint32_t maxPushConstantsSize() const { return fMaxPushConstantsSize; }
 
+    size_t transferBufferAlignment() const { return fTransferBufferAlignment; }
+
     virtual bool isFormatSRGB(const GrBackendFormat&) const = 0;
 
     bool isFormatCompressed(const GrBackendFormat& format) const;
@@ -569,6 +571,7 @@
     int fMaxWindowRectangles;
     int fInternalMultisampleCount;
     uint32_t fMaxPushConstantsSize = 0;
+    size_t fTransferBufferAlignment = 1;
 
     GrDriverBugWorkarounds fDriverBugWorkarounds;
 
diff --git a/src/gpu/GrSurfaceContext.cpp b/src/gpu/GrSurfaceContext.cpp
index 92dc3b4..94682af 100644
--- a/src/gpu/GrSurfaceContext.cpp
+++ b/src/gpu/GrSurfaceContext.cpp
@@ -838,6 +838,7 @@
         ReadPixelsContext fClientContext;
         SkISize fSize;
         SkColorType fColorType;
+        size_t fBufferAlignment;
         GrClientMappedBufferManager* fMappedBufferManager;
         PixelTransferResult fTransferResult;
     };
@@ -848,13 +849,16 @@
                                             callbackContext,
                                             rect.size(),
                                             colorType,
+                                            this->caps()->transferBufferAlignment(),
                                             mappedBufferManager,
                                             std::move(transferResult)};
     auto finishCallback = [](GrGpuFinishedContext c) {
         const auto* context = reinterpret_cast<const FinishContext*>(c);
         auto manager = context->fMappedBufferManager;
         auto result = std::make_unique<AsyncReadResult>(manager->owningDirectContext());
-        size_t rowBytes = context->fSize.width() * SkColorTypeBytesPerPixel(context->fColorType);
+        size_t rowBytes =
+                GrAlignTo(context->fSize.width() * SkColorTypeBytesPerPixel(context->fColorType),
+                          context->fBufferAlignment);
         if (!result->addTransferResult(context->fTransferResult, context->fSize, rowBytes,
                                        manager)) {
             result.reset();
@@ -1057,6 +1061,7 @@
         ReadPixelsContext fClientContext;
         GrClientMappedBufferManager* fMappedBufferManager;
         SkISize fSize;
+        size_t fBufferAlignment;
         PixelTransferResult fYTransfer;
         PixelTransferResult fUTransfer;
         PixelTransferResult fVTransfer;
@@ -1068,6 +1073,7 @@
                                             callbackContext,
                                             dContext->priv().clientMappedBufferManager(),
                                             dstSize,
+                                            this->caps()->transferBufferAlignment(),
                                             std::move(yTransfer),
                                             std::move(uTransfer),
                                             std::move(vTransfer)};
@@ -1076,12 +1082,14 @@
         auto manager = context->fMappedBufferManager;
         auto result = std::make_unique<AsyncReadResult>(manager->owningDirectContext());
         size_t rowBytes = SkToSizeT(context->fSize.width());
+        rowBytes = GrAlignTo(rowBytes, context->fBufferAlignment);
         if (!result->addTransferResult(context->fYTransfer, context->fSize, rowBytes, manager)) {
             (*context->fClientCallback)(context->fClientContext, nullptr);
             delete context;
             return;
         }
-        rowBytes /= 2;
+        rowBytes = SkToSizeT(context->fSize.width()) / 2;
+        rowBytes = GrAlignTo(rowBytes, context->fBufferAlignment);
         SkISize uvSize = {context->fSize.width() / 2, context->fSize.height() / 2};
         if (!result->addTransferResult(context->fUTransfer, uvSize, rowBytes, manager)) {
             (*context->fClientCallback)(context->fClientContext, nullptr);
@@ -1338,6 +1346,7 @@
     }
 
     size_t rowBytes = GrColorTypeBytesPerPixel(supportedRead.fColorType) * rect.width();
+    rowBytes = GrAlignTo(rowBytes, this->caps()->transferBufferAlignment());
     size_t size = rowBytes * rect.height();
     // By using kStream_GrAccessPattern here, we are not able to cache and reuse the buffer for
     // multiple reads. Switching to kDynamic_GrAccessPattern would allow for this, however doing
diff --git a/src/gpu/d3d/GrD3DBuffer.cpp b/src/gpu/d3d/GrD3DBuffer.cpp
index f291e73..3e1adf2 100644
--- a/src/gpu/d3d/GrD3DBuffer.cpp
+++ b/src/gpu/d3d/GrD3DBuffer.cpp
@@ -112,26 +112,30 @@
     fResourceState = newResourceState;
 }
 
-void GrD3DBuffer::onRelease() {
-    if (!this->wasDestroyed()) {
-        VALIDATE();
-        // Note: we intentionally don't release the d3d resource here since it may still be in use
-        // by the gpu and a call to GrContext::release could get us in here.
-        fMapPtr = nullptr;
-        VALIDATE();
+void GrD3DBuffer::releaseResource() {
+    if (this->wasDestroyed()) {
+        return;
     }
-    INHERITED::onRelease();
+
+    if (fMapPtr) {
+        this->internalUnmap(this->size());
+        fMapPtr = nullptr;
+    }
+
+    SkASSERT(fD3DResource);
+    SkASSERT(fAlloc);
+    fD3DResource.reset();
+    fAlloc.reset();
+}
+
+void GrD3DBuffer::onRelease() {
+    this->releaseResource();
+    this->INHERITED::onRelease();
 }
 
 void GrD3DBuffer::onAbandon() {
-    if (!this->wasDestroyed()) {
-        VALIDATE();
-        // Note: we intentionally don't release the d3d resource here since it may still be in use
-        // by the gpu and a call to GrContext::abandon could get us in here.
-        fMapPtr = nullptr;
-        VALIDATE();
-    }
-    INHERITED::onAbandon();
+    this->releaseResource();
+    this->INHERITED::onAbandon();
 }
 
 void GrD3DBuffer::onMap() {
diff --git a/src/gpu/d3d/GrD3DBuffer.h b/src/gpu/d3d/GrD3DBuffer.h
index 944d588..f69d8d0 100644
--- a/src/gpu/d3d/GrD3DBuffer.h
+++ b/src/gpu/d3d/GrD3DBuffer.h
@@ -38,6 +38,8 @@
     D3D12_RESOURCE_STATES fResourceState;
 
 private:
+    void releaseResource();
+
     void onMap() override;
     void onUnmap() override;
     bool onUpdateData(const void* src, size_t srcSizeInBytes) override;
diff --git a/src/gpu/d3d/GrD3DCaps.cpp b/src/gpu/d3d/GrD3DCaps.cpp
index 940c7aa..60fed80 100644
--- a/src/gpu/d3d/GrD3DCaps.cpp
+++ b/src/gpu/d3d/GrD3DCaps.cpp
@@ -45,13 +45,14 @@
     fReadPixelsRowBytesSupport = true;
     fWritePixelsRowBytesSupport = true;
 
-    // TODO: implement these
-    fTransferFromBufferToTextureSupport = false;
-    fTransferFromSurfaceToBufferSupport = false;
+    fTransferFromBufferToTextureSupport = true;
+    fTransferFromSurfaceToBufferSupport = true;
 
     fMaxRenderTargetSize = 16384;  // minimum required by feature level 11_0
     fMaxTextureSize = 16384;       // minimum required by feature level 11_0
 
+    fTransferBufferAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
+
     // TODO: implement
     fDynamicStateArrayGeometryProcessorTextureSupport = false;
 
@@ -889,7 +890,6 @@
         return { GrColorType::kUnknown, 0 };
     }
 
-    // TODO: this seems to be pretty constrictive, confirm
     // Any buffer data needs to be aligned to 512 bytes and that of a single texel.
     size_t offsetAlignment = GrAlignTo(GrDxgiFormatBytesPerBlock(dxgiFormat),
                                        D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
@@ -1019,8 +1019,8 @@
                                                         : GrColorType::kRGBA_8888, 0 };
     }
 
-    // Any subresource buffer data we copy to needs to be aligned to 256 bytes.
-    size_t offsetAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
+    // Any subresource buffer data offset we copy to needs to be aligned to 512 bytes.
+    size_t offsetAlignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
 
     const auto& info = this->getFormatInfo(dxgiFormat);
     for (int i = 0; i < info.fColorTypeInfoCount; ++i) {
diff --git a/src/gpu/d3d/GrD3DGpu.cpp b/src/gpu/d3d/GrD3DGpu.cpp
index e947a41..1567e91 100644
--- a/src/gpu/d3d/GrD3DGpu.cpp
+++ b/src/gpu/d3d/GrD3DGpu.cpp
@@ -565,7 +565,6 @@
         return false;
     }
 
-    // Set up src location and box
     GrD3DTextureResource* texResource = nullptr;
     GrD3DRenderTarget* rt = static_cast<GrD3DRenderTarget*>(surface->asRenderTarget());
     if (rt) {
@@ -578,6 +577,43 @@
         return false;
     }
 
+    D3D12_RESOURCE_DESC desc = texResource->d3dResource()->GetDesc();
+    D3D12_PLACED_SUBRESOURCE_FOOTPRINT placedFootprint;
+    UINT64 transferTotalBytes;
+    fDevice->GetCopyableFootprints(&desc, 0, 1, 0, &placedFootprint,
+                                   nullptr, nullptr, &transferTotalBytes);
+    SkASSERT(transferTotalBytes);
+    // TODO: implement some way of reusing buffers instead of making a new one every time.
+    sk_sp<GrGpuBuffer> transferBuffer = this->createBuffer(transferTotalBytes,
+                                                           GrGpuBufferType::kXferGpuToCpu,
+                                                           kDynamic_GrAccessPattern);
+
+    this->readOrTransferPixels(texResource, left, top, width, height, transferBuffer,
+                               placedFootprint);
+    this->submitDirectCommandList(SyncQueue::kForce);
+
+    // Copy back to CPU buffer
+    size_t bpp = GrColorTypeBytesPerPixel(dstColorType);
+    if (GrDxgiFormatBytesPerBlock(texResource->dxgiFormat()) != bpp) {
+        return false;
+    }
+    size_t tightRowBytes = bpp * width;
+
+    const void* mappedMemory = transferBuffer->map();
+
+    SkRectMemcpy(buffer, rowBytes, mappedMemory, placedFootprint.Footprint.RowPitch,
+                 tightRowBytes, height);
+
+    transferBuffer->unmap();
+
+    return true;
+}
+
+void GrD3DGpu::readOrTransferPixels(GrD3DTextureResource* texResource,
+                                    int left, int top, int width, int height,
+                                    sk_sp<GrGpuBuffer> transferBuffer,
+                                    const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& placedFootprint) {
+    // Set up src location and box
     D3D12_TEXTURE_COPY_LOCATION srcLocation = {};
     srcLocation.pResource = texResource->d3dResource();
     SkASSERT(srcLocation.pResource);
@@ -592,25 +628,10 @@
     srcBox.front = 0;
     srcBox.back = 1;
 
-    // Set up dst location and create transfer buffer
+    // Set up dst location
     D3D12_TEXTURE_COPY_LOCATION dstLocation = {};
     dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
-    UINT64 transferTotalBytes;
-    const UINT64 baseOffset = 0;
-    D3D12_RESOURCE_DESC desc = srcLocation.pResource->GetDesc();
-    fDevice->GetCopyableFootprints(&desc, 0, 1, baseOffset, &dstLocation.PlacedFootprint,
-                                   nullptr, nullptr, &transferTotalBytes);
-    SkASSERT(transferTotalBytes);
-    size_t bpp = GrColorTypeBytesPerPixel(dstColorType);
-    if (GrDxgiFormatBytesPerBlock(texResource->dxgiFormat()) != bpp) {
-        return false;
-    }
-    size_t tightRowBytes = bpp * width;
-
-    // TODO: implement some way of reusing buffers instead of making a new one every time.
-    sk_sp<GrGpuBuffer> transferBuffer = this->createBuffer(transferTotalBytes,
-                                                           GrGpuBufferType::kXferGpuToCpu,
-                                                           kDynamic_GrAccessPattern);
+    dstLocation.PlacedFootprint = placedFootprint;
     GrD3DBuffer* d3dBuf = static_cast<GrD3DBuffer*>(transferBuffer.get());
     dstLocation.pResource = d3dBuf->d3dResource();
 
@@ -620,16 +641,6 @@
     fCurrentDirectCommandList->copyTextureRegionToBuffer(transferBuffer, &dstLocation, 0, 0,
                                                          texResource->resource(), &srcLocation,
                                                          &srcBox);
-    this->submitDirectCommandList(SyncQueue::kForce);
-
-    const void* mappedMemory = transferBuffer->map();
-
-    SkRectMemcpy(buffer, rowBytes, mappedMemory, dstLocation.PlacedFootprint.Footprint.RowPitch,
-                 tightRowBytes, height);
-
-    transferBuffer->unmap();
-
-    return true;
 }
 
 bool GrD3DGpu::onWritePixels(GrSurface* surface, int left, int top, int width, int height,
@@ -756,6 +767,122 @@
     return true;
 }
 
+bool GrD3DGpu::onTransferPixelsTo(GrTexture* texture, int left, int top, int width, int height,
+                                  GrColorType surfaceColorType, GrColorType bufferColorType,
+                                  sk_sp<GrGpuBuffer> transferBuffer, size_t bufferOffset,
+                                  size_t rowBytes) {
+    if (!this->currentCommandList()) {
+        return false;
+    }
+
+    if (!transferBuffer) {
+        return false;
+    }
+
+    size_t bpp = GrColorTypeBytesPerPixel(bufferColorType);
+    if (GrBackendFormatBytesPerPixel(texture->backendFormat()) != bpp) {
+        return false;
+    }
+
+    // D3D requires offsets for texture transfers to be aligned to this value
+    if (SkToBool(bufferOffset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT-1))) {
+        return false;
+    }
+
+    GrD3DTexture* d3dTex = static_cast<GrD3DTexture*>(texture);
+    if (!d3dTex) {
+        return false;
+    }
+
+    SkDEBUGCODE(DXGI_FORMAT format = d3dTex->dxgiFormat());
+
+    // Can't transfer compressed data
+    SkASSERT(!GrDxgiFormatIsCompressed(format));
+
+    SkASSERT(GrDxgiFormatBytesPerBlock(format) == GrColorTypeBytesPerPixel(bufferColorType));
+
+    SkDEBUGCODE(
+        SkIRect subRect = SkIRect::MakeXYWH(left, top, width, height);
+        SkIRect bounds = SkIRect::MakeWH(texture->width(), texture->height());
+        SkASSERT(bounds.contains(subRect));
+        )
+
+    // Set up copy region
+    D3D12_PLACED_SUBRESOURCE_FOOTPRINT placedFootprint = {};
+    ID3D12Resource* d3dResource = d3dTex->d3dResource();
+    SkASSERT(d3dResource);
+    D3D12_RESOURCE_DESC desc = d3dResource->GetDesc();
+    desc.Width = width;
+    desc.Height = height;
+    UINT64 totalBytes;
+    fDevice->GetCopyableFootprints(&desc, 0, 1, 0, &placedFootprint,
+                                   nullptr, nullptr, &totalBytes);
+    placedFootprint.Offset = bufferOffset;
+
+    // Change state of our target so it can be copied to
+    d3dTex->setResourceState(this, D3D12_RESOURCE_STATE_COPY_DEST);
+
+    // Copy the buffer to the image.
+    ID3D12Resource* d3dBuffer = static_cast<GrD3DBuffer*>(transferBuffer.get())->d3dResource();
+    fCurrentDirectCommandList->copyBufferToTexture(d3dBuffer, d3dTex, 1,
+                                                   &placedFootprint, left, top);
+    this->currentCommandList()->addGrBuffer(std::move(transferBuffer));
+
+    d3dTex->markMipmapsDirty();
+    return true;
+}
+
+bool GrD3DGpu::onTransferPixelsFrom(GrSurface* surface, int left, int top, int width, int height,
+                                    GrColorType surfaceColorType, GrColorType bufferColorType,
+                                    sk_sp<GrGpuBuffer> transferBuffer, size_t offset) {
+    if (!this->currentCommandList()) {
+        return false;
+    }
+    SkASSERT(surface);
+    SkASSERT(transferBuffer);
+    // TODO
+    //if (fProtectedContext == GrProtected::kYes) {
+    //    return false;
+    //}
+
+    // D3D requires offsets for texture transfers to be aligned to this value
+    if (SkToBool(offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT-1))) {
+        return false;
+    }
+
+    GrD3DTextureResource* texResource = nullptr;
+    GrD3DRenderTarget* rt = static_cast<GrD3DRenderTarget*>(surface->asRenderTarget());
+    if (rt) {
+        texResource = rt;
+    } else {
+        texResource = static_cast<GrD3DTexture*>(surface->asTexture());
+    }
+
+    if (!texResource) {
+        return false;
+    }
+
+    SkDEBUGCODE(DXGI_FORMAT format = texResource->dxgiFormat());
+    SkASSERT(GrDxgiFormatBytesPerBlock(format) == GrColorTypeBytesPerPixel(bufferColorType));
+
+    D3D12_RESOURCE_DESC desc = texResource->d3dResource()->GetDesc();
+    desc.Width = width;
+    desc.Height = height;
+    D3D12_PLACED_SUBRESOURCE_FOOTPRINT placedFootprint;
+    UINT64 transferTotalBytes;
+    fDevice->GetCopyableFootprints(&desc, 0, 1, offset, &placedFootprint,
+                                   nullptr, nullptr, &transferTotalBytes);
+    SkASSERT(transferTotalBytes);
+
+    this->readOrTransferPixels(texResource, left, top, width, height,
+                               transferBuffer, placedFootprint);
+
+    // TODO: It's not clear how to ensure the transfer is done before we read from the buffer,
+    // other than maybe doing a resource state transition.
+
+    return true;
+}
+
 static bool check_resource_info(const GrD3DTextureResourceInfo& info) {
     if (!info.fResource.get()) {
         return false;
diff --git a/src/gpu/d3d/GrD3DGpu.h b/src/gpu/d3d/GrD3DGpu.h
index 9a9ece2..d4a51bf 100644
--- a/src/gpu/d3d/GrD3DGpu.h
+++ b/src/gpu/d3d/GrD3DGpu.h
@@ -177,14 +177,11 @@
     bool onTransferPixelsTo(GrTexture* texture, int left, int top, int width, int height,
                             GrColorType surfaceColorType, GrColorType bufferColorType,
                             sk_sp<GrGpuBuffer> transferBuffer, size_t offset,
-                            size_t rowBytes) override {
-        return true;
-    }
+                            size_t rowBytes) override;
     bool onTransferPixelsFrom(GrSurface* surface, int left, int top, int width, int height,
                               GrColorType surfaceColorType, GrColorType bufferColorType,
-                              sk_sp<GrGpuBuffer> transferBuffer, size_t offset) override {
-        return true;
-    }
+                              sk_sp<GrGpuBuffer> transferBuffer, size_t offset) override;
+
     bool onCopySurface(GrSurface* dst, GrSurface* src, const SkIRect& srcRect,
                        const SkIPoint& dstPoint) override;
 
@@ -259,6 +256,11 @@
     bool uploadToTexture(GrD3DTexture* tex, int left, int top, int width, int height,
                          GrColorType colorType, const GrMipLevel* texels, int mipLevelCount);
 
+    void readOrTransferPixels(GrD3DTextureResource* texResource,
+                              int left, int top, int width, int height,
+                              sk_sp<GrGpuBuffer> transferBuffer,
+                              const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& placedFootprint);
+
     bool createTextureResourceForBackendSurface(DXGI_FORMAT dxgiFormat,
                                                 SkISize dimensions,
                                                 GrTexturable texturable,
diff --git a/tests/ReadWritePixelsGpuTest.cpp b/tests/ReadWritePixelsGpuTest.cpp
index f3d4070..c8036f6 100644
--- a/tests/ReadWritePixelsGpuTest.cpp
+++ b/tests/ReadWritePixelsGpuTest.cpp
@@ -707,9 +707,10 @@
                               ShutdownSequence::kAbandon_FreeResult_DestroyContext,
                               ShutdownSequence::kReleaseAndAbandon_DestroyContext_FreeResult,
                               ShutdownSequence::kAbandon_DestroyContext_FreeResult}) {
-            // Vulkan context abandoning without resource release has issues outside of the scope of
-            // this test.
-            if (type == sk_gpu_test::GrContextFactory::kVulkan_ContextType &&
+            // Vulkan and D3D context abandoning without resource release has issues outside of the
+            // scope of this test.
+            if ((type == sk_gpu_test::GrContextFactory::kVulkan_ContextType ||
+                 type == sk_gpu_test::GrContextFactory::kDirect3D_ContextType) &&
                 (sequence == ShutdownSequence::kFreeResult_ReleaseAndAbandon_DestroyContext ||
                  sequence == ShutdownSequence::kFreeResult_Abandon_DestroyContext ||
                  sequence == ShutdownSequence::kReleaseAndAbandon_FreeResult_DestroyContext ||
diff --git a/tests/TransferPixelsTest.cpp b/tests/TransferPixelsTest.cpp
index 01a75f2..b832978 100644
--- a/tests/TransferPixelsTest.cpp
+++ b/tests/TransferPixelsTest.cpp
@@ -24,11 +24,11 @@
 
 using sk_gpu_test::GrContextFactory;
 
-void fill_transfer_data(int left, int top, int width, int height, int bufferWidth,
+void fill_transfer_data(int left, int top, int width, int height, int rowBytes,
                         GrColorType dstType, char* dst) {
     size_t dstBpp = GrColorTypeBytesPerPixel(dstType);
-    auto dstLocation = [dst, dstBpp, bufferWidth](int x, int y) {
-        return dst + y * dstBpp * bufferWidth + x * dstBpp;
+    auto dstLocation = [dst, dstBpp, rowBytes](int x, int y) {
+        return dst + y * rowBytes + x * dstBpp;
     };
     // build red-green gradient
     for (int j = top; j < top + height; ++j) {
@@ -143,10 +143,12 @@
     // either of which may differ from 'colorType'.
     GrCaps::SupportedWrite allowedSrc =
             caps->supportedWritePixelsColorType(colorType, tex->backendFormat(), colorType);
-    size_t srcRowBytes = GrColorTypeBytesPerPixel(allowedSrc.fColorType) * srcBufferWidth;
+    size_t srcRowBytes = GrAlignTo(GrColorTypeBytesPerPixel(allowedSrc.fColorType) * srcBufferWidth,
+                                   caps->transferBufferAlignment());
+
     std::unique_ptr<char[]> srcData(new char[kTexDims.fHeight * srcRowBytes]);
 
-    fill_transfer_data(0, 0, kTexDims.fWidth, kTexDims.fHeight, srcBufferWidth,
+    fill_transfer_data(0, 0, kTexDims.fWidth, kTexDims.fHeight, srcRowBytes,
                        allowedSrc.fColorType, srcData.get());
 
     // create and fill transfer buffer
@@ -209,20 +211,27 @@
     // with a left sub-rect inset of 2 but may adjust that so we can fulfill the transfer buffer
     // offset alignment requirement.
     int left = 2;
-    const int top = 10;
+    int top = 10;
     const int width = 10;
     const int height = 2;
     size_t offset = top * srcRowBytes + left * GrColorTypeBytesPerPixel(allowedSrc.fColorType);
     while (offset % allowedSrc.fOffsetAlignmentForTransferBuffer) {
         offset += GrColorTypeBytesPerPixel(allowedSrc.fColorType);
         ++left;
-        // We're assuming that the required alignment is 1 or a small multiple of the bpp, which
-        // it is currently for all color types across all backends.
+        // In most cases we assume that the required alignment is 1 or a small multiple of the bpp,
+        // which it is for color types across all current backends except Direct3D. To correct for
+        // Direct3D's large alignment requirement we may adjust the top location as well.
+        if (left + width > tex->width()) {
+            left = 0;
+            ++top;
+            offset = top * srcRowBytes;
+        }
         SkASSERT(left + width <= tex->width());
+        SkASSERT(top + height <= tex->height());
     }
 
     // change color of subrectangle
-    fill_transfer_data(left, top, width, height, srcBufferWidth, allowedSrc.fColorType,
+    fill_transfer_data(left, top, width, height, srcRowBytes, allowedSrc.fColorType,
                        srcData.get());
     data = buffer->map();
     memcpy(data, srcData.get(), size);
@@ -231,8 +240,6 @@
     result = gpu->transferPixelsTo(tex.get(), left, top, width, height, colorType,
                                    allowedSrc.fColorType, buffer, offset, srcRowBytes);
     if (!result) {
-        gpu->transferPixelsTo(tex.get(), left, top, width, height, colorType, allowedSrc.fColorType,
-                              buffer, offset, srcRowBytes);
         ERRORF(reporter, "Could not transfer pixels to texture, color type: %d",
                static_cast<int>(colorType));
         return;
@@ -279,7 +286,7 @@
     size_t textureDataBpp = GrColorTypeBytesPerPixel(colorType);
     size_t textureDataRowBytes = kTexDims.fWidth * textureDataBpp;
     std::unique_ptr<char[]> textureData(new char[kTexDims.fHeight * textureDataRowBytes]);
-    fill_transfer_data(0, 0, kTexDims.fWidth, kTexDims.fHeight, kTexDims.fHeight, colorType,
+    fill_transfer_data(0, 0, kTexDims.fWidth, kTexDims.fHeight, textureDataRowBytes, colorType,
                        textureData.get());
     GrMipLevel data;
     data.fPixels = textureData.get();
@@ -308,8 +315,8 @@
     GrImageInfo readInfo(allowedRead.fColorType, kUnpremul_SkAlphaType, nullptr, kTexDims);
 
     size_t bpp = GrColorTypeBytesPerPixel(allowedRead.fColorType);
-    size_t fullBufferRowBytes = kTexDims.fWidth * bpp;
-    size_t partialBufferRowBytes = kPartialWidth * bpp;
+    size_t fullBufferRowBytes = GrAlignTo(kTexDims.fWidth * bpp, caps->transferBufferAlignment());
+    size_t partialBufferRowBytes = GrAlignTo(kPartialWidth * bpp, caps->transferBufferAlignment());
     size_t offsetAlignment = allowedRead.fOffsetAlignmentForTransferBuffer;
     SkASSERT(offsetAlignment);
 
@@ -318,7 +325,8 @@
     static constexpr size_t kStartingOffset = 11;
     size_t partialReadOffset = kStartingOffset +
                                (offsetAlignment - kStartingOffset%offsetAlignment)%offsetAlignment;
-    bufferSize = std::max(bufferSize, partialReadOffset + partialBufferRowBytes * kPartialHeight);
+    bufferSize = std::max(bufferSize,
+                          partialReadOffset + partialBufferRowBytes * kPartialHeight);
 
     sk_sp<GrGpuBuffer> buffer(resourceProvider->createBuffer(
             bufferSize, GrGpuBufferType::kXferGpuToCpu, kDynamic_GrAccessPattern));