Dawn: implement staging buffer manager.

Use managed staging buffers for texture uploads, uniforms and buffers.

Change-Id: I063707c160236725d27a1d1bfb197d3096a07b34
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/238120
Commit-Queue: Stephen White <senorblanco@chromium.org>
Reviewed-by: Greg Daniel <egdaniel@google.com>
diff --git a/gn/gpu.gni b/gn/gpu.gni
index 46892b4..d5844a2 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -725,6 +725,8 @@
   "$_src/gpu/dawn/GrDawnRingBuffer.h",
   "$_src/gpu/dawn/GrDawnStencilAttachment.cpp",
   "$_src/gpu/dawn/GrDawnStencilAttachment.h",
+  "$_src/gpu/dawn/GrDawnStagingManager.cpp",
+  "$_src/gpu/dawn/GrDawnStagingManager.h",
   "$_src/gpu/dawn/GrDawnTexture.cpp",
   "$_src/gpu/dawn/GrDawnTexture.h",
   "$_src/gpu/dawn/GrDawnTextureRenderTarget.cpp",
diff --git a/src/gpu/dawn/GrDawnBuffer.cpp b/src/gpu/dawn/GrDawnBuffer.cpp
index 4815e36..825a3ce 100644
--- a/src/gpu/dawn/GrDawnBuffer.cpp
+++ b/src/gpu/dawn/GrDawnBuffer.cpp
@@ -30,7 +30,7 @@
 GrDawnBuffer::GrDawnBuffer(GrDawnGpu* gpu, size_t sizeInBytes, GrGpuBufferType type,
                            GrAccessPattern pattern)
     : INHERITED(gpu, sizeInBytes, type, pattern)
-    , fData(nullptr) {
+    , fStagingBuffer(nullptr) {
     dawn::BufferDescriptor bufferDesc;
     bufferDesc.size = sizeInBytes;
     bufferDesc.usage = GrGpuBufferTypeToDawnUsageBit(type) | dawn::BufferUsageBit::CopyDst;
@@ -39,31 +39,33 @@
 }
 
 GrDawnBuffer::~GrDawnBuffer() {
-    delete[] fData;
 }
 
 void GrDawnBuffer::onMap() {
     if (this->wasDestroyed()) {
         return;
     }
-    fData = new char[this->size()];
-    fMapPtr = fData;
+    fStagingBuffer = getDawnGpu()->getStagingBuffer(this->size());
+    fMapPtr = fStagingBuffer->fData;
 }
 
 void GrDawnBuffer::onUnmap() {
     if (this->wasDestroyed()) {
         return;
     }
-    fBuffer.SetSubData(0, this->size(), reinterpret_cast<const uint8_t*>(fData));
-    delete[] fData;
-    fData = nullptr;
+    fStagingBuffer->fBuffer.Unmap();
+    fMapPtr = nullptr;
+    getDawnGpu()->getCopyEncoder()
+        .CopyBufferToBuffer(fStagingBuffer->fBuffer, 0, fBuffer, 0, this->size());
 }
 
 bool GrDawnBuffer::onUpdateData(const void* src, size_t srcSizeInBytes) {
     if (this->wasDestroyed()) {
         return false;
     }
-    fBuffer.SetSubData(0, srcSizeInBytes, static_cast<const uint8_t*>(src));
+    this->onMap();
+    memcpy(fStagingBuffer->fData, src, srcSizeInBytes);
+    this->onUnmap();
     return true;
 }
 
diff --git a/src/gpu/dawn/GrDawnBuffer.h b/src/gpu/dawn/GrDawnBuffer.h
index 96dc4e2..17fa1c8 100644
--- a/src/gpu/dawn/GrDawnBuffer.h
+++ b/src/gpu/dawn/GrDawnBuffer.h
@@ -12,6 +12,7 @@
 #include "dawn/dawncpp.h"
 
 class GrDawnGpu;
+struct GrDawnStagingBuffer;
 
 class GrDawnBuffer : public GrGpuBuffer {
 public:
@@ -27,7 +28,7 @@
 
 private:
     dawn::Buffer fBuffer;
-    char* fData;          // Used only for map/unmap.
+    GrDawnStagingBuffer* fStagingBuffer;
     typedef GrGpuBuffer INHERITED;
 };
 
diff --git a/src/gpu/dawn/GrDawnGpu.cpp b/src/gpu/dawn/GrDawnGpu.cpp
index 0aba404..9bfa34c 100644
--- a/src/gpu/dawn/GrDawnGpu.cpp
+++ b/src/gpu/dawn/GrDawnGpu.cpp
@@ -102,7 +102,8 @@
         , fQueue(device.CreateQueue())
         , fCompiler(new SkSL::Compiler())
         , fUniformRingBuffer(this, dawn::BufferUsageBit::Uniform)
-        , fRenderPipelineCache(kMaxRenderPipelineEntries) {
+        , fRenderPipelineCache(kMaxRenderPipelineEntries)
+        , fStagingManager(fDevice) {
     fCaps.reset(new GrDawnCaps(options));
 }
 
@@ -354,21 +355,21 @@
         size_t origRowBytes = bpp * w;
         size_t rowBytes = GrDawnRoundRowBytes(origRowBytes);
         size_t size = rowBytes * h;
-        dawn::BufferDescriptor bufferDesc;
-        bufferDesc.size = size;
-        bufferDesc.usage = dawn::BufferUsageBit::CopySrc | dawn::BufferUsageBit::CopyDst;
-        dawn::Buffer buffer = this->device().CreateBuffer(&bufferDesc);
-        const uint8_t* src = static_cast<const uint8_t*>(pixels);
+        GrDawnStagingBuffer* stagingBuffer = this->getStagingBuffer(size);
         if (rowBytes == origRowBytes) {
-            buffer.SetSubData(0, size, src);
+            memcpy(stagingBuffer->fData, pixels, size);
         } else {
-            uint32_t offset = 0;
+            const char* src = static_cast<const char*>(pixels);
+            char* dst = static_cast<char*>(stagingBuffer->fData);
             for (int row = 0; row < h; row++) {
-                buffer.SetSubData(offset, origRowBytes, src);
-                offset += rowBytes;
+                memcpy(dst, src, origRowBytes);
+                dst += rowBytes;
                 src += origRowBytes;
             }
         }
+        dawn::Buffer buffer = stagingBuffer->fBuffer;
+        buffer.Unmap();
+        stagingBuffer->fData = nullptr;
         dawn::BufferCopyView srcBuffer;
         srcBuffer.buffer = buffer;
         srcBuffer.offset = 0;
@@ -458,6 +459,8 @@
     this->flushCopyEncoder();
     fQueue.Submit(fCommandBuffers.size(), &fCommandBuffers.front());
     fCommandBuffers.clear();
+    fStagingManager.mapBusyList();
+    fDevice.Tick();
 }
 
 void GrDawnGpu::onFinishFlush(GrSurfaceProxy*[], int n, SkSurface::BackendSurfaceAccess access,
@@ -644,6 +647,10 @@
     return fUniformRingBuffer.allocate(size);
 }
 
+GrDawnStagingBuffer* GrDawnGpu::getStagingBuffer(size_t size) {
+    return fStagingManager.findOrCreateStagingBuffer(size);
+}
+
 void GrDawnGpu::appendCommandBuffer(dawn::CommandBuffer commandBuffer) {
     if (commandBuffer) {
         fCommandBuffers.push_back(commandBuffer);
diff --git a/src/gpu/dawn/GrDawnGpu.h b/src/gpu/dawn/GrDawnGpu.h
index 42bd486..059fd38 100644
--- a/src/gpu/dawn/GrDawnGpu.h
+++ b/src/gpu/dawn/GrDawnGpu.h
@@ -12,6 +12,7 @@
 #include "dawn/dawncpp.h"
 #include "src/core/SkLRUCache.h"
 #include "src/gpu/dawn/GrDawnRingBuffer.h"
+#include "src/gpu/dawn/GrDawnStagingManager.h"
 
 class GrDawnOpsRenderPass;
 class GrPipeline;
@@ -94,6 +95,8 @@
                                                    GrPrimitiveType primitiveType);
 
     GrDawnRingBuffer::Slice allocateUniformRingBufferSlice(int size);
+    GrDawnStagingBuffer* getStagingBuffer(size_t size);
+    GrDawnStagingManager* getStagingManager() { return &fStagingManager; }
     dawn::CommandEncoder getCopyEncoder();
     void flushCopyEncoder();
     void appendCommandBuffer(dawn::CommandBuffer commandBuffer);
@@ -173,6 +176,7 @@
     };
 
     SkLRUCache<GrProgramDesc, sk_sp<GrDawnProgram>, ProgramDescHash>    fRenderPipelineCache;
+    GrDawnStagingManager fStagingManager;
 
     typedef GrGpu INHERITED;
 };
diff --git a/src/gpu/dawn/GrDawnProgramBuilder.cpp b/src/gpu/dawn/GrDawnProgramBuilder.cpp
index 9d68b77..30661b1 100644
--- a/src/gpu/dawn/GrDawnProgramBuilder.cpp
+++ b/src/gpu/dawn/GrDawnProgramBuilder.cpp
@@ -587,7 +587,7 @@
         GrFragmentProcessor::TextureSampler sampler(sk_ref_sp(proxy));
         setTexture(gpu, sampler.samplerState(), sampler.peekTexture(), &bindings, &binding);
     }
-    fDataManager.uploadUniformBuffers(geom, frag);
+    fDataManager.uploadUniformBuffers(gpu, geom, frag);
     dawn::BindGroupDescriptor descriptor;
     descriptor.layout = fBindGroupLayout;
     descriptor.bindingCount = bindings.size();
diff --git a/src/gpu/dawn/GrDawnProgramDataManager.cpp b/src/gpu/dawn/GrDawnProgramDataManager.cpp
index b2bb013..573cb98 100644
--- a/src/gpu/dawn/GrDawnProgramDataManager.cpp
+++ b/src/gpu/dawn/GrDawnProgramDataManager.cpp
@@ -263,17 +263,26 @@
     }
 };
 
-void GrDawnProgramDataManager::uploadUniformBuffers(GrDawnRingBuffer::Slice geometryBuffer,
+void GrDawnProgramDataManager::uploadUniformBuffers(GrDawnGpu* gpu,
+                                                    GrDawnRingBuffer::Slice geometryBuffer,
                                                     GrDawnRingBuffer::Slice fragmentBuffer) const {
-
     dawn::Buffer geom = geometryBuffer.fBuffer;
+    uint32_t geomOffset = geometryBuffer.fOffset;
     dawn::Buffer frag = fragmentBuffer.fBuffer;
+    uint32_t fragOffset = fragmentBuffer.fOffset;
+    auto copyEncoder = gpu->getCopyEncoder();
     if (geom && fGeometryUniformsDirty) {
-        geom.SetSubData(geometryBuffer.fOffset, fGeometryUniformSize,
-                        static_cast<const uint8_t*>(fGeometryUniformData.get()));
+        GrDawnStagingBuffer* stagingBuffer = gpu->getStagingBuffer(fGeometryUniformSize);
+        memcpy(stagingBuffer->fData, fGeometryUniformData.get(), fGeometryUniformSize);
+        stagingBuffer->fBuffer.Unmap();
+        copyEncoder
+            .CopyBufferToBuffer(stagingBuffer->fBuffer, 0, geom, geomOffset, fGeometryUniformSize);
     }
     if (frag && fFragmentUniformsDirty) {
-        frag.SetSubData(fragmentBuffer.fOffset, fFragmentUniformSize,
-                        static_cast<const uint8_t*>(fFragmentUniformData.get()));
+        GrDawnStagingBuffer* stagingBuffer = gpu->getStagingBuffer(fFragmentUniformSize);
+        memcpy(stagingBuffer->fData, fFragmentUniformData.get(), fFragmentUniformSize);
+        stagingBuffer->fBuffer.Unmap();
+        copyEncoder
+            .CopyBufferToBuffer(stagingBuffer->fBuffer, 0, frag, fragOffset, fFragmentUniformSize);
     }
 }
diff --git a/src/gpu/dawn/GrDawnProgramDataManager.h b/src/gpu/dawn/GrDawnProgramDataManager.h
index d74f1ee..b77d5ff 100644
--- a/src/gpu/dawn/GrDawnProgramDataManager.h
+++ b/src/gpu/dawn/GrDawnProgramDataManager.h
@@ -58,7 +58,8 @@
         SK_ABORT("Only supported in NVPR, which is not in Dawn");
     }
 
-    void uploadUniformBuffers(GrDawnRingBuffer::Slice geometryBuffer,
+    void uploadUniformBuffers(GrDawnGpu* gpu,
+                              GrDawnRingBuffer::Slice geometryBuffer,
                               GrDawnRingBuffer::Slice fragmentBuffer) const;
 
     uint32_t geometryUniformSize() const { return fGeometryUniformSize; }
diff --git a/src/gpu/dawn/GrDawnStagingManager.cpp b/src/gpu/dawn/GrDawnStagingManager.cpp
new file mode 100644
index 0000000..0a6e8a1
--- /dev/null
+++ b/src/gpu/dawn/GrDawnStagingManager.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/dawn/GrDawnStagingManager.h"
+
+#include "src/core/SkMathPriv.h"
+
+GrDawnStagingManager::GrDawnStagingManager(dawn::Device device) : fDevice(device) {
+}
+
+GrDawnStagingManager::~GrDawnStagingManager() {
+    // Clean up any pending callbacks before destroying the StagingBuffers.
+    while (fWaitingCount > 0) {
+        fDevice.Tick();
+    }
+}
+
+GrDawnStagingBuffer* GrDawnStagingManager::findOrCreateStagingBuffer(size_t size) {
+    size_t sizePow2 = GrNextPow2(size);
+    GrDawnStagingBuffer* stagingBuffer;
+    auto i = fReadyPool.find(sizePow2);
+    if (i != fReadyPool.end()) {
+        stagingBuffer = i->second;
+        fReadyPool.erase(i);
+    } else {
+        dawn::BufferDescriptor desc;
+        desc.usage = dawn::BufferUsageBit::MapWrite | dawn::BufferUsageBit::CopySrc;
+        desc.size = sizePow2;
+        dawn::CreateBufferMappedResult result = fDevice.CreateBufferMapped(&desc);
+        std::unique_ptr<GrDawnStagingBuffer> b(new GrDawnStagingBuffer(
+            this, result.buffer, sizePow2, result.data));
+        stagingBuffer = b.get();
+        fBuffers.push_back(std::move(b));
+    }
+    fBusyList.push_back(stagingBuffer);
+    return stagingBuffer;
+}
+
+static void callback(DawnBufferMapAsyncStatus status, void* data, uint64_t dataLength,
+                     void* userData) {
+    GrDawnStagingBuffer* buffer = static_cast<GrDawnStagingBuffer*>(userData);
+    buffer->fData = data;
+    if (buffer->fManager) {
+        buffer->fManager->addToReadyPool(buffer);
+    }
+}
+
+void GrDawnStagingManager::mapBusyList() {
+    // Map all buffers on the busy list for writing. When they're no longer in flight on the GPU,
+    // their callback will be called and they'll be moved to the ready pool.
+    for (GrDawnStagingBuffer* buffer : fBusyList) {
+        buffer->fBuffer.MapWriteAsync(callback, buffer);
+        fWaitingCount++;
+    }
+    fBusyList.clear();
+}
+
+void GrDawnStagingManager::addToReadyPool(GrDawnStagingBuffer* buffer) {
+    fWaitingCount--;
+    fReadyPool.insert(std::pair<size_t, GrDawnStagingBuffer*>(buffer->fSize, buffer));
+}
diff --git a/src/gpu/dawn/GrDawnStagingManager.h b/src/gpu/dawn/GrDawnStagingManager.h
new file mode 100644
index 0000000..b3974ad
--- /dev/null
+++ b/src/gpu/dawn/GrDawnStagingManager.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrDawnStagingManager_DEFINED
+#define GrDawnStagingManager_DEFINED
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "dawn/dawncpp.h"
+
+struct GrDawnStagingBuffer;
+
+class GrDawnStagingManager {
+public:
+    GrDawnStagingManager(dawn::Device device);
+   ~GrDawnStagingManager();
+    GrDawnStagingBuffer* findOrCreateStagingBuffer(size_t size);
+
+    void addToReadyPool(GrDawnStagingBuffer* buffer);
+    void mapBusyList();
+
+private:
+    dawn::Device                                       fDevice;
+    std::vector<std::unique_ptr<GrDawnStagingBuffer>>  fBuffers;
+    std::multimap<size_t, GrDawnStagingBuffer*>        fReadyPool;
+    std::vector<GrDawnStagingBuffer*>                  fBusyList;
+    int                                                fWaitingCount = 0;
+};
+
+struct GrDawnStagingBuffer {
+    GrDawnStagingBuffer(GrDawnStagingManager* manager, dawn::Buffer buffer, size_t size,
+                       void* data)
+        : fManager(manager), fBuffer(buffer), fSize(size), fData(data) {}
+    ~GrDawnStagingBuffer() {
+        fManager = nullptr;
+    }
+    GrDawnStagingManager*  fManager;
+    dawn::Buffer           fBuffer;
+    size_t                 fSize;
+    void*                  fData;
+};
+
+#endif
diff --git a/src/gpu/dawn/GrDawnTexture.cpp b/src/gpu/dawn/GrDawnTexture.cpp
index ee9df7d..928c0b5 100644
--- a/src/gpu/dawn/GrDawnTexture.cpp
+++ b/src/gpu/dawn/GrDawnTexture.cpp
@@ -166,31 +166,23 @@
         }
         size_t rowBytes = GrDawnRoundRowBytes(origRowBytes);
         size_t size = rowBytes * height;
-
-        dawn::BufferDescriptor desc;
-        desc.usage = dawn::BufferUsageBit::CopyDst | dawn::BufferUsageBit::CopySrc;
-        desc.size = size;
-
-        dawn::Buffer stagingBuffer = device.CreateBuffer(&desc);
-
+        GrDawnStagingBuffer* stagingBuffer = getDawnGpu()->getStagingBuffer(size);
         if (rowBytes == origRowBytes) {
-            stagingBuffer.SetSubData(0, size,
-                static_cast<const uint8_t*>(static_cast<const void *>(src)));
+            memcpy(stagingBuffer->fData, src, size);
         } else {
-            char* buf = new char[size];
-            char* dst = buf;
+            char* dst = static_cast<char*>(stagingBuffer->fData);
             for (uint32_t row = 0; row < height; row++) {
                 memcpy(dst, src, origRowBytes);
                 dst += rowBytes;
                 src += texels[i].fRowBytes;
             }
-            stagingBuffer.SetSubData(0, size,
-                static_cast<const uint8_t*>(static_cast<const void*>(buf)));
-            delete[] buf;
         }
+        dawn::Buffer buffer = stagingBuffer->fBuffer;
+        buffer.Unmap();
+        stagingBuffer->fData = nullptr;
 
         dawn::BufferCopyView srcBuffer;
-        srcBuffer.buffer = stagingBuffer;
+        srcBuffer.buffer = buffer;
         srcBuffer.offset = 0;
         srcBuffer.rowPitch = rowBytes;
         srcBuffer.imageHeight = height;