Update command buffer and finishProc processing in Metal

* Restructure GrMtlGpu::submitCommandBuffer() to handle empty
command buffers better and ensure that finishedCallbacks are invoked.
* Move finishedCallbacks from GrMtlGpu to GrMtlCommandBuffer to ensure
they're really invoked on completion/deletion.

Bug: skia:10530
Change-Id: I9f642342f03f540e46fad62a311c4195be87eab8
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/306936
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Jim Van Verth <jvanverth@google.com>
diff --git a/src/gpu/mtl/GrMtlCommandBuffer.h b/src/gpu/mtl/GrMtlCommandBuffer.h
index 50373fb..e8cd34c 100644
--- a/src/gpu/mtl/GrMtlCommandBuffer.h
+++ b/src/gpu/mtl/GrMtlCommandBuffer.h
@@ -11,6 +11,7 @@
 #import <Metal/Metal.h>
 
 #include "include/core/SkRefCnt.h"
+#include "include/gpu/GrTypes.h"
 #include "src/gpu/GrBuffer.h"
 #include "src/gpu/mtl/GrMtlUtil.h"
 
@@ -23,7 +24,12 @@
     static sk_sp<GrMtlCommandBuffer> Make(id<MTLCommandQueue> queue);
     ~GrMtlCommandBuffer();
 
-    void commit(bool waitUntilCompleted);
+    bool commit(bool waitUntilCompleted);
+    bool hasWork() { return fHasWork; }
+
+    void addFinishedCallback(sk_sp<GrRefCntedCallback> callback) {
+        fFinishedCallbacks.push_back(std::move(callback));
+    }
 
     id<MTLBlitCommandEncoder> getBlitCommandEncoder();
     id<MTLRenderCommandEncoder> getRenderCommandEncoder(MTLRenderPassDescriptor*,
@@ -41,12 +47,18 @@
     void encodeSignalEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
     void encodeWaitForEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
 
+    void waitUntilCompleted() {
+        [fCmdBuffer waitUntilCompleted];
+    }
+    void callFinishedCallbacks() { fFinishedCallbacks.reset(); }
+
 private:
     static const int kInitialTrackedResourcesCount = 32;
 
     GrMtlCommandBuffer(id<MTLCommandBuffer> cmdBuffer)
         : fCmdBuffer(cmdBuffer)
-        , fPreviousRenderPassDescriptor(nil) {}
+        , fPreviousRenderPassDescriptor(nil)
+        , fHasWork(false) {}
 
     void endAllEncoding();
 
@@ -54,6 +66,9 @@
     id<MTLBlitCommandEncoder>   fActiveBlitCommandEncoder;
     id<MTLRenderCommandEncoder> fActiveRenderCommandEncoder;
     MTLRenderPassDescriptor*    fPreviousRenderPassDescriptor;
+    bool                        fHasWork;
+
+    SkTArray<sk_sp<GrRefCntedCallback>> fFinishedCallbacks;
 
     SkSTArray<kInitialTrackedResourcesCount, sk_sp<const GrBuffer>> fTrackedGrBuffers;
 };
diff --git a/src/gpu/mtl/GrMtlCommandBuffer.mm b/src/gpu/mtl/GrMtlCommandBuffer.mm
index 7c04021..6d8a790 100644
--- a/src/gpu/mtl/GrMtlCommandBuffer.mm
+++ b/src/gpu/mtl/GrMtlCommandBuffer.mm
@@ -30,6 +30,8 @@
 GrMtlCommandBuffer::~GrMtlCommandBuffer() {
     this->endAllEncoding();
     fTrackedGrBuffers.reset();
+    this->callFinishedCallbacks();
+
     fCmdBuffer = nil;
 }
 
@@ -43,6 +45,7 @@
         fActiveBlitCommandEncoder = [fCmdBuffer blitCommandEncoder];
     }
     fPreviousRenderPassDescriptor = nil;
+    fHasWork = true;
 
     return fActiveBlitCommandEncoder;
 }
@@ -88,33 +91,34 @@
         opsRenderPass->initRenderState(fActiveRenderCommandEncoder);
     }
     fPreviousRenderPassDescriptor = descriptor;
+    fHasWork = true;
 
     return fActiveRenderCommandEncoder;
 }
 
-void GrMtlCommandBuffer::commit(bool waitUntilCompleted) {
+bool GrMtlCommandBuffer::commit(bool waitUntilCompleted) {
     this->endAllEncoding();
     [fCmdBuffer commit];
     if (waitUntilCompleted) {
-        [fCmdBuffer waitUntilCompleted];
+        this->waitUntilCompleted();
     }
 
-    if (MTLCommandBufferStatusError == fCmdBuffer.status) {
+    if (fCmdBuffer.status == MTLCommandBufferStatusError) {
         NSString* description = fCmdBuffer.error.localizedDescription;
         const char* errorString = [description UTF8String];
         SkDebugf("Error submitting command buffer: %s\n", errorString);
     }
 
-    fCmdBuffer = nil;
+    return (fCmdBuffer.status != MTLCommandBufferStatusError);
 }
 
 void GrMtlCommandBuffer::endAllEncoding() {
-    if (nil != fActiveRenderCommandEncoder) {
+    if (fActiveRenderCommandEncoder) {
         [fActiveRenderCommandEncoder endEncoding];
         fActiveRenderCommandEncoder = nil;
         fPreviousRenderPassDescriptor = nil;
     }
-    if (nil != fActiveBlitCommandEncoder) {
+    if (fActiveBlitCommandEncoder) {
         [fActiveBlitCommandEncoder endEncoding];
         fActiveBlitCommandEncoder = nil;
     }
@@ -126,6 +130,7 @@
     if (@available(macOS 10.14, iOS 12.0, *)) {
         [fCmdBuffer encodeSignalEvent:event value:eventValue];
     }
+    fHasWork = true;
 }
 
 void GrMtlCommandBuffer::encodeWaitForEvent(id<MTLEvent> event, uint64_t eventValue) {
@@ -135,5 +140,6 @@
     if (@available(macOS 10.14, iOS 12.0, *)) {
         [fCmdBuffer encodeWaitForEvent:event value:eventValue];
     }
+    fHasWork = true;
 }
 
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 6dbc566..f89b079 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -47,7 +47,10 @@
 
     GrMtlResourceProvider& resourceProvider() { return fResourceProvider; }
 
-    GrMtlCommandBuffer* commandBuffer();
+    GrMtlCommandBuffer* commandBuffer() {
+        SkASSERT(fCurrentCmdBuffer);
+        return fCurrentCmdBuffer.get();
+     }
 
     enum SyncQueue {
         kForce_SyncQueue,
@@ -101,7 +104,7 @@
             GrWrapOwnership ownership) override;
     void insertSemaphore(GrSemaphore* semaphore) override;
     void waitSemaphore(GrSemaphore* semaphore) override;
-    void checkFinishProcs() override;
+    void checkFinishProcs() override { this->checkForFinishedCommandBuffers(); }
     std::unique_ptr<GrSemaphore> prepareTextureForCrossContextUsage(GrTexture*) override;
 
     // When the Metal backend actually uses indirect command buffers, this function will actually do
@@ -208,13 +211,14 @@
 
     void addFinishedProc(GrGpuFinishedProc finishedProc,
                          GrGpuFinishedContext finishedContext) override;
+    void addFinishedCallback(sk_sp<GrRefCntedCallback> finishedCallback);
 
     bool onSubmitToGpu(bool syncCpu) override;
 
     // Commits the current command buffer to the queue and then creates a new command buffer. If
     // sync is set to kForce_SyncQueue, the function will wait for all work in the committed
     // command buffer to finish before returning.
-    void submitCommandBuffer(SyncQueue sync);
+    bool submitCommandBuffer(SyncQueue sync);
 
     void checkForFinishedCommandBuffers();
 
@@ -268,8 +272,6 @@
 
     bool fDisconnected;
 
-    GrFinishCallbacks fFinishCallbacks;
-
     typedef GrGpu INHERITED;
 };
 
diff --git a/src/gpu/mtl/GrMtlGpu.mm b/src/gpu/mtl/GrMtlGpu.mm
index 932c694..4e09f6a 100644
--- a/src/gpu/mtl/GrMtlGpu.mm
+++ b/src/gpu/mtl/GrMtlGpu.mm
@@ -126,10 +126,10 @@
         , fOutstandingCommandBuffers(sizeof(OutstandingCommandBuffer), kDefaultOutstandingAllocCnt)
         , fCompiler(new SkSL::Compiler())
         , fResourceProvider(this)
-        , fDisconnected(false)
-        , fFinishCallbacks(this) {
+        , fDisconnected(false) {
     fMtlCaps.reset(new GrMtlCaps(options, fDevice, featureSet));
     fCaps = fMtlCaps;
+    fCurrentCmdBuffer = GrMtlCommandBuffer::Make(fQueue);
 }
 
 GrMtlGpu::~GrMtlGpu() {
@@ -181,30 +181,44 @@
     delete renderPass;
 }
 
-GrMtlCommandBuffer* GrMtlGpu::commandBuffer() {
-    if (!fCurrentCmdBuffer) {
-        fCurrentCmdBuffer = GrMtlCommandBuffer::Make(fQueue);
-
-        // This should be done after we have a new command buffer in case the freeing of any
-        // resources held by a finished command buffer causes us to send a new command to the gpu
-        // (like changing the resource state).
-        this->checkForFinishedCommandBuffers();
+bool GrMtlGpu::submitCommandBuffer(SyncQueue sync) {
+    SkASSERT(fCurrentCmdBuffer);
+    if (!fCurrentCmdBuffer->hasWork()) {
+        if (sync == SyncQueue::kForce_SyncQueue) {
+            // wait for the last command buffer we've submitted to finish
+            OutstandingCommandBuffer* back =
+                    (OutstandingCommandBuffer*)fOutstandingCommandBuffers.back();
+            if (back) {
+                back->fCommandBuffer->waitUntilCompleted();
+            }
+            this->checkForFinishedCommandBuffers();
+        }
+        // We need to manually call the finishedCallbacks since we don't add this
+        // to the OutstandingCommandBuffer list
+        fCurrentCmdBuffer->callFinishedCallbacks();
+        return true;
     }
-    return fCurrentCmdBuffer.get();
-}
 
-void GrMtlGpu::submitCommandBuffer(SyncQueue sync) {
-    // TODO: handle sync with empty command buffer
-    if (fCurrentCmdBuffer) {
-        fResourceProvider.addBufferCompletionHandler(fCurrentCmdBuffer.get());
+    fResourceProvider.addBufferCompletionHandler(fCurrentCmdBuffer.get());
 
-        GrFence fence = this->insertFence();
-        new (fOutstandingCommandBuffers.push_back()) OutstandingCommandBuffer(
-                fCurrentCmdBuffer, fence);
+    GrFence fence = this->insertFence();
+    new (fOutstandingCommandBuffers.push_back()) OutstandingCommandBuffer(
+            fCurrentCmdBuffer, fence);
 
-        fCurrentCmdBuffer->commit(SyncQueue::kForce_SyncQueue == sync);
-        fCurrentCmdBuffer.reset();
+    if (!fCurrentCmdBuffer->commit(sync == SyncQueue::kForce_SyncQueue)) {
+        return false;
     }
+
+    // Create a new command buffer for the next submit
+    fCurrentCmdBuffer = GrMtlCommandBuffer::Make(fQueue);
+
+    // This should be done after we have a new command buffer in case the freeing of any
+    // resources held by a finished command buffer causes us to send a new command to the gpu
+    // (like changing the resource state).
+    this->checkForFinishedCommandBuffers();
+
+    SkASSERT(fCurrentCmdBuffer);
+    return true;
 }
 
 void GrMtlGpu::checkForFinishedCommandBuffers() {
@@ -225,21 +239,33 @@
 
 void GrMtlGpu::addFinishedProc(GrGpuFinishedProc finishedProc,
                                GrGpuFinishedContext finishedContext) {
-    fFinishCallbacks.add(finishedProc, finishedContext);
+    SkASSERT(finishedProc);
+    sk_sp<GrRefCntedCallback> finishedCallback(
+            new GrRefCntedCallback(finishedProc, finishedContext));
+    this->addFinishedCallback(std::move(finishedCallback));
+}
+
+void GrMtlGpu::addFinishedCallback(sk_sp<GrRefCntedCallback> finishedCallback) {
+    SkASSERT(finishedCallback);
+    // Besides the current commandbuffer, we also add the finishedCallback to the newest outstanding
+    // commandbuffer. Our contract for calling the proc is that all previous submitted cmdbuffers
+    // have finished when we call it. However, if our current command buffer has no work when it is
+    // flushed it will drop its ref to the callback immediately. But the previous work may not have
+    // finished. It is safe to only add the proc to the newest outstanding commandbuffer cause that
+    // must finish after all previously submitted command buffers.
+    OutstandingCommandBuffer* back = (OutstandingCommandBuffer*)fOutstandingCommandBuffers.back();
+    if (back) {
+        back->fCommandBuffer->addFinishedCallback(finishedCallback);
+    }
+    fCurrentCmdBuffer->addFinishedCallback(std::move(finishedCallback));
 }
 
 bool GrMtlGpu::onSubmitToGpu(bool syncCpu) {
     if (syncCpu) {
-        this->submitCommandBuffer(kForce_SyncQueue);
-        fFinishCallbacks.callAll(true);
+        return this->submitCommandBuffer(kForce_SyncQueue);
     } else {
-        this->submitCommandBuffer(kSkip_SyncQueue);
+        return this->submitCommandBuffer(kSkip_SyncQueue);
     }
-    return true;
-}
-
-void GrMtlGpu::checkFinishProcs() {
-    fFinishCallbacks.check();
 }
 
 std::unique_ptr<GrSemaphore> GrMtlGpu::prepareTextureForCrossContextUsage(GrTexture*) {
@@ -1002,6 +1028,11 @@
     [transferBuffer didModifyRange: NSMakeRange(0, transferBufferSize)];
 #endif
 
+    // TODO: Add this when we switch over to using the main cmdbuffer
+//    if (finishedCallback) {
+//        this->addFinishedCallback(std::move(finishedCallback));
+//    }
+
     [blitCmdEncoder endEncoding];
     [cmdBuffer commit];
     transferBuffer = nil;