Add callback to flush for knowing when gpu is finished work.

Bug: skia:8802
Change-Id: I093c2a4e879b635b169a849d9af3e9f7a3d84a88
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/207870
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 075ac00..a2ca3d1 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -244,18 +244,21 @@
     RETURN_IF_ABANDONED
 
     this->drawingManager()->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                  kNone_GrFlushFlags, 0, nullptr);
+                                  kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
 }
 
 GrSemaphoresSubmitted GrContext::flush(GrFlushFlags flags, int numSemaphores,
-                                       GrBackendSemaphore signalSemaphores[]) {
+                                       GrBackendSemaphore signalSemaphores[],
+                                       GrGpuFinishedProc finishedProc,
+                                       GrGpuFinishedContext finishedContext) {
     ASSERT_SINGLE_OWNER
     if (this->abandoned()) {
         return GrSemaphoresSubmitted::kNo;
     }
 
     return this->drawingManager()->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                         flags, numSemaphores, signalSemaphores);
+                                         flags, numSemaphores, signalSemaphores, finishedProc,
+                                         finishedContext);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GrContextPriv.cpp b/src/gpu/GrContextPriv.cpp
index f545e90..9c1bc97 100644
--- a/src/gpu/GrContextPriv.cpp
+++ b/src/gpu/GrContextPriv.cpp
@@ -195,7 +195,7 @@
     ASSERT_OWNED_PROXY_PRIV(proxy);
 
     fContext->drawingManager()->flush(proxy, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                      kNone_GrFlushFlags, 0, nullptr);
+                                      kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
 }
 
 void GrContextPriv::prepareSurfaceForExternalIO(GrSurfaceProxy* proxy) {
@@ -204,7 +204,8 @@
     SkASSERT(proxy);
     ASSERT_OWNED_PROXY_PRIV(proxy);
     fContext->drawingManager()->prepareSurfaceForExternalIO(proxy,
-            SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr);
+            SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr,
+            nullptr, nullptr);
 }
 
 static bool valid_premul_color_type(GrColorType ct) {
diff --git a/src/gpu/GrDrawingManager.cpp b/src/gpu/GrDrawingManager.cpp
index 3cd1e51..4f559ea 100644
--- a/src/gpu/GrDrawingManager.cpp
+++ b/src/gpu/GrDrawingManager.cpp
@@ -210,28 +210,41 @@
                                               SkSurface::BackendSurfaceAccess access,
                                               GrFlushFlags flags,
                                               int numSemaphores,
-                                              GrBackendSemaphore backendSemaphores[]) {
+                                              GrBackendSemaphore backendSemaphores[],
+                                              GrGpuFinishedProc finishedProc,
+                                              GrGpuFinishedContext finishedContext) {
     GR_CREATE_TRACE_MARKER_CONTEXT("GrDrawingManager", "flush", fContext);
 
     if (fFlushing || this->wasAbandoned()) {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
         return GrSemaphoresSubmitted::kNo;
     }
 
     SkDEBUGCODE(this->validate());
 
-    if (kNone_GrFlushFlags == flags && !numSemaphores && proxy && !fDAG.isUsed(proxy)) {
+    if (kNone_GrFlushFlags == flags && !numSemaphores && !finishedProc &&
+            proxy && !fDAG.isUsed(proxy)) {
         return GrSemaphoresSubmitted::kNo;
     }
 
     auto direct = fContext->priv().asDirectContext();
     if (!direct) {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
         return GrSemaphoresSubmitted::kNo; // Can't flush while DDL recording
     }
 
     GrGpu* gpu = direct->priv().getGpu();
     if (!gpu) {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
         return GrSemaphoresSubmitted::kNo; // Can't flush while DDL recording
     }
+
     fFlushing = true;
 
     auto resourceProvider = direct->priv().resourceProvider();
@@ -351,7 +364,8 @@
 #endif
 
     GrSemaphoresSubmitted result = gpu->finishFlush(proxy, access, flags, numSemaphores,
-                                                    backendSemaphores);
+                                                    backendSemaphores, finishedProc,
+                                                    finishedContext);
 
     flushState.deinstantiateProxyTracker()->deinstantiateAllProxies();
 
@@ -442,7 +456,7 @@
         (*numOpListsExecuted)++;
         if (*numOpListsExecuted >= kMaxOpListsBeforeFlush) {
             flushState->gpu()->finishFlush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                           kNone_GrFlushFlags, 0, nullptr);
+                                           kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
             *numOpListsExecuted = 0;
         }
     }
@@ -460,7 +474,7 @@
         (*numOpListsExecuted)++;
         if (*numOpListsExecuted >= kMaxOpListsBeforeFlush) {
             flushState->gpu()->finishFlush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                           kNone_GrFlushFlags, 0, nullptr);
+                                           kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
             *numOpListsExecuted = 0;
         }
     }
@@ -480,7 +494,9 @@
 
 GrSemaphoresSubmitted GrDrawingManager::prepareSurfaceForExternalIO(
         GrSurfaceProxy* proxy, SkSurface::BackendSurfaceAccess access, GrFlushFlags flags,
-        int numSemaphores, GrBackendSemaphore backendSemaphores[]) {
+        int numSemaphores, GrBackendSemaphore backendSemaphores[],
+        GrGpuFinishedProc finishedProc,
+        GrGpuFinishedContext finishedContext) {
     if (this->wasAbandoned()) {
         return GrSemaphoresSubmitted::kNo;
     }
@@ -501,7 +517,7 @@
     // portion of the DAG required by 'proxy' in order to restore some of the
     // semantics of this method.
     GrSemaphoresSubmitted result = this->flush(proxy, access, flags, numSemaphores,
-                                               backendSemaphores);
+                                               backendSemaphores, finishedProc, finishedContext);
     if (!proxy->isInstantiated()) {
         return result;
     }
@@ -759,7 +775,7 @@
     auto resourceCache = direct->priv().getResourceCache();
     if (resourceCache && resourceCache->requestsFlush()) {
         this->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                    kNone_GrFlushFlags, 0, nullptr);
+                    kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
         resourceCache->purgeAsNeeded();
     }
 }
diff --git a/src/gpu/GrDrawingManager.h b/src/gpu/GrDrawingManager.h
index c927983..ef937fe 100644
--- a/src/gpu/GrDrawingManager.h
+++ b/src/gpu/GrDrawingManager.h
@@ -74,7 +74,9 @@
                                                       SkSurface::BackendSurfaceAccess access,
                                                       GrFlushFlags flags,
                                                       int numSemaphores,
-                                                      GrBackendSemaphore backendSemaphores[]);
+                                                      GrBackendSemaphore backendSemaphores[],
+                                                      GrGpuFinishedProc finishedProc,
+                                                      GrGpuFinishedContext finishedContext);
 
     void addOnFlushCallbackObject(GrOnFlushCallbackObject*);
 
@@ -153,7 +155,9 @@
                                 SkSurface::BackendSurfaceAccess access,
                                 GrFlushFlags flags,
                                 int numSemaphores,
-                                GrBackendSemaphore backendSemaphores[]);
+                                GrBackendSemaphore backendSemaphores[],
+                                GrGpuFinishedProc finishedProc,
+                                GrGpuFinishedContext finishedContext);
 
     SkDEBUGCODE(void validate() const);
 
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index c20b255..df909b6 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -421,7 +421,9 @@
 GrSemaphoresSubmitted GrGpu::finishFlush(GrSurfaceProxy* proxy,
                                          SkSurface::BackendSurfaceAccess access,
                                          GrFlushFlags flags, int numSemaphores,
-                                         GrBackendSemaphore backendSemaphores[]) {
+                                         GrBackendSemaphore backendSemaphores[],
+                                         GrGpuFinishedProc finishedProc,
+                                         GrGpuFinishedContext finishedContext) {
     this->stats()->incNumFinishFlushes();
     GrResourceProvider* resourceProvider = fContext->priv().resourceProvider();
 
@@ -443,7 +445,8 @@
         }
     }
     this->onFinishFlush(proxy, access, flags,
-                        (numSemaphores > 0 && this->caps()->fenceSyncSupport()));
+                        (numSemaphores > 0 && this->caps()->fenceSyncSupport()),
+                        finishedProc, finishedContext);
     return this->caps()->fenceSyncSupport() ? GrSemaphoresSubmitted::kYes
                                             : GrSemaphoresSubmitted::kNo;
 }
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index e44e74c..6b7ab84 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -304,7 +304,9 @@
     // inserted semaphores.
     GrSemaphoresSubmitted finishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
                                       GrFlushFlags flags, int numSemaphores,
-                                      GrBackendSemaphore backendSemaphores[]);
+                                      GrBackendSemaphore backendSemaphores[],
+                                      GrGpuFinishedProc finishedProc,
+                                      GrGpuFinishedContext finishedContext);
 
     virtual void submit(GrGpuCommandBuffer*) = 0;
 
@@ -547,7 +549,9 @@
                                bool canDiscardOutsideDstRect) = 0;
 
     virtual void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                               GrFlushFlags flags, bool insertedSemaphores) = 0;
+                               GrFlushFlags flags, bool insertedSemaphores,
+                               GrGpuFinishedProc finishedProc,
+                               GrGpuFinishedContext finishedContext) = 0;
 
 #ifdef SK_ENABLE_DUMP_GPU
     virtual void onDumpJSON(SkJSONWriter*) const {}
diff --git a/src/gpu/GrRenderTargetContext.cpp b/src/gpu/GrRenderTargetContext.cpp
index a53dd84..47806f8 100644
--- a/src/gpu/GrRenderTargetContext.cpp
+++ b/src/gpu/GrRenderTargetContext.cpp
@@ -1742,7 +1742,8 @@
 
 GrSemaphoresSubmitted GrRenderTargetContext::prepareForExternalIO(
         SkSurface::BackendSurfaceAccess access, GrFlushFlags flags, int numSemaphores,
-        GrBackendSemaphore backendSemaphores[]) {
+        GrBackendSemaphore backendSemaphores[], GrGpuFinishedProc finishedProc,
+        GrGpuFinishedContext finishedContext) {
     ASSERT_SINGLE_OWNER
     if (fContext->priv().abandoned()) {
         return GrSemaphoresSubmitted::kNo;
@@ -1753,7 +1754,9 @@
     return this->drawingManager()->prepareSurfaceForExternalIO(fRenderTargetProxy.get(),
                                                                access, flags,
                                                                numSemaphores,
-                                                               backendSemaphores);
+                                                               backendSemaphores,
+                                                               finishedProc,
+                                                               finishedContext);
 }
 
 bool GrRenderTargetContext::waitOnSemaphores(int numSemaphores,
diff --git a/src/gpu/GrRenderTargetContext.h b/src/gpu/GrRenderTargetContext.h
index c4ef529..8e42227 100644
--- a/src/gpu/GrRenderTargetContext.h
+++ b/src/gpu/GrRenderTargetContext.h
@@ -408,7 +408,9 @@
      */
     GrSemaphoresSubmitted prepareForExternalIO(SkSurface::BackendSurfaceAccess access,
                                                GrFlushFlags flags, int numSemaphores,
-                                               GrBackendSemaphore backendSemaphores[]);
+                                               GrBackendSemaphore backendSemaphores[],
+                                               GrGpuFinishedProc finishedProc,
+                                               GrGpuFinishedContext finishedContext);
 
     /**
      *  The next time this GrRenderTargetContext is flushed, the gpu will wait on the passed in
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 9f05af7..388450f 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -1627,18 +1627,19 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkGpuDevice::flush() {
-    this->flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess::kNoAccess,
-                                   kNone_GrFlushFlags, 0, nullptr);
+    this->flush(SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr, nullptr,
+                nullptr);
 }
 
-GrSemaphoresSubmitted SkGpuDevice::flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess access,
-                                                            GrFlushFlags flags,
-                                                            int numSemaphores,
-                                                            GrBackendSemaphore signalSemaphores[]) {
+GrSemaphoresSubmitted SkGpuDevice::flush(SkSurface::BackendSurfaceAccess access, GrFlushFlags flags,
+                                         int numSemaphores, GrBackendSemaphore signalSemaphores[],
+                                         GrGpuFinishedProc finishedProc,
+                                         GrGpuFinishedContext finishedContext) {
     ASSERT_SINGLE_OWNER
 
     return fRenderTargetContext->prepareForExternalIO(access, flags, numSemaphores,
-                                                      signalSemaphores);
+                                                      signalSemaphores, finishedProc,
+                                                      finishedContext);
 }
 
 bool SkGpuDevice::wait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) {
diff --git a/src/gpu/SkGpuDevice.h b/src/gpu/SkGpuDevice.h
index e36286b..fca1b27 100644
--- a/src/gpu/SkGpuDevice.h
+++ b/src/gpu/SkGpuDevice.h
@@ -120,10 +120,12 @@
     sk_sp<SkSpecialImage> snapBackImage(const SkIRect&) override;
 
     void flush() override;
-    GrSemaphoresSubmitted flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess access,
-                                                   GrFlushFlags flags,
-                                                   int numSemaphores,
-                                                   GrBackendSemaphore signalSemaphores[]);
+    GrSemaphoresSubmitted flush(SkSurface::BackendSurfaceAccess access,
+                                GrFlushFlags flags,
+                                int numSemaphores,
+                                GrBackendSemaphore signalSemaphores[],
+                                GrGpuFinishedProc finishedProc,
+                                GrGpuFinishedContext finishedContext);
     bool wait(int numSemaphores, const GrBackendSemaphore* waitSemaphores);
 
     bool onAccessPixels(SkPixmap*) override;
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 506d06a..e7ef184 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -4307,7 +4307,9 @@
 }
 
 void GrGLGpu::onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                            GrFlushFlags flags, bool insertedSemaphore) {
+                            GrFlushFlags flags, bool insertedSemaphore,
+                            GrGpuFinishedProc finishedProc,
+                            GrGpuFinishedContext finishedContext) {
     // If we inserted semaphores during the flush, we need to call GLFlush.
     if (insertedSemaphore) {
         GL_CALL(Flush());
@@ -4315,6 +4317,10 @@
     if (flags & kSyncCpu_GrFlushFlag) {
         GL_CALL(Finish());
     }
+    // TODO: We should have GL actually wait until the GPU has finished work on the GPU.
+    if (finishedProc) {
+        finishedProc(finishedContext);
+    }
 }
 
 void GrGLGpu::submit(GrGpuCommandBuffer* buffer) {
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 9cfd338..86504d8 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -294,7 +294,9 @@
     void flushBlend(const GrXferProcessor::BlendInfo& blendInfo, const GrSwizzle&);
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                       GrFlushFlags flags, bool insertedSemaphores) override;
+                       GrFlushFlags flags, bool insertedSemaphores,
+                       GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override;
 
     bool copySurfaceAsDraw(GrSurface* dst, GrSurfaceOrigin dstOrigin,
                            GrSurface* src, GrSurfaceOrigin srcOrigin,
diff --git a/src/gpu/mock/GrMockGpu.h b/src/gpu/mock/GrMockGpu.h
index 56c40ae..c1e0065 100644
--- a/src/gpu/mock/GrMockGpu.h
+++ b/src/gpu/mock/GrMockGpu.h
@@ -111,7 +111,13 @@
     void onResolveRenderTarget(GrRenderTarget* target) override { return; }
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                       GrFlushFlags flags, bool insertedSemaphores) override {}
+                       GrFlushFlags flags, bool insertedSemaphores,
+                       GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
+    }
 
     GrStencilAttachment* createStencilAttachmentForRenderTarget(const GrRenderTarget*,
                                                                 int width,
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 18736eb..5e5c0a9 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -182,11 +182,21 @@
     void onResolveRenderTarget(GrRenderTarget* target) override { return; }
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                       GrFlushFlags flags, bool insertedSemaphores) override {
+                       GrFlushFlags flags, bool insertedSemaphores,
+                       GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override {
         if (flags & kSyncCpu_GrFlushFlag) {
             this->submitCommandBuffer(kForce_SyncQueue);
+            if (finishedProc) {
+                finishedProc(finishedContext);
+            }
         } else {
             this->submitCommandBuffer(kSkip_SyncQueue);
+            // TODO: support finishedProc to actually be called when the GPU is done with the work
+            // and not immediately.
+            if (finishedProc) {
+                finishedProc(finishedContext);
+            }
         }
     }
 
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index fb222af..016b657 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -632,13 +632,15 @@
         }
         SkASSERT(!err);
 
+        fFinishedProcs.reset();
+
         // Destroy the fence
         GR_VK_CALL(gpu->vkInterface(), DestroyFence(gpu->device(), fSubmitFence, nullptr));
         fSubmitFence = VK_NULL_HANDLE;
     }
 }
 
-bool GrVkPrimaryCommandBuffer::finished(const GrVkGpu* gpu) const {
+bool GrVkPrimaryCommandBuffer::finished(const GrVkGpu* gpu) {
     SkASSERT(!fIsActive);
     if (VK_NULL_HANDLE == fSubmitFence) {
         return true;
@@ -647,6 +649,7 @@
     VkResult err = GR_VK_CALL(gpu->vkInterface(), GetFenceStatus(gpu->device(), fSubmitFence));
     switch (err) {
         case VK_SUCCESS:
+            fFinishedProcs.reset();
             return true;
 
         case VK_NOT_READY:
@@ -661,6 +664,10 @@
     return false;
 }
 
+void GrVkPrimaryCommandBuffer::addFinishedProc(sk_sp<GrRefCntedCallback> finishedProc) {
+    fFinishedProcs.push_back(std::move(finishedProc));
+}
+
 void GrVkPrimaryCommandBuffer::onReleaseResources(GrVkGpu* gpu) {
     for (int i = 0; i < fSecondaryCommandBuffers.count(); ++i) {
         fSecondaryCommandBuffers[i]->releaseResources(gpu);
diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h
index 8c91a61..c1c8015 100644
--- a/src/gpu/vk/GrVkCommandBuffer.h
+++ b/src/gpu/vk/GrVkCommandBuffer.h
@@ -313,7 +313,9 @@
     void submitToQueue(const GrVkGpu* gpu, VkQueue queue, GrVkGpu::SyncQueue sync,
                        SkTArray<GrVkSemaphore::Resource*>& signalSemaphores,
                        SkTArray<GrVkSemaphore::Resource*>& waitSemaphores);
-    bool finished(const GrVkGpu* gpu) const;
+    bool finished(const GrVkGpu* gpu);
+
+    void addFinishedProc(sk_sp<GrRefCntedCallback> finishedProc);
 
     void recycleSecondaryCommandBuffers();
 
@@ -336,6 +338,7 @@
 
     SkTArray<GrVkSecondaryCommandBuffer*, true> fSecondaryCommandBuffers;
     VkFence                                     fSubmitFence;
+    SkTArray<sk_sp<GrRefCntedCallback>>         fFinishedProcs;
 
     typedef GrVkCommandBuffer INHERITED;
 };
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index e1032b5..207e5a3 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -316,13 +316,17 @@
     return fCachedTexCommandBuffer.get();
 }
 
-void GrVkGpu::submitCommandBuffer(SyncQueue sync) {
+void GrVkGpu::submitCommandBuffer(SyncQueue sync, GrGpuFinishedProc finishedProc,
+                                  GrGpuFinishedContext finishedContext) {
     SkASSERT(fCurrentCmdBuffer);
 
     if (!fCurrentCmdBuffer->hasWork() && kForce_SyncQueue != sync &&
         !fSemaphoresToSignal.count() && !fSemaphoresToWaitOn.count()) {
         SkASSERT(fDrawables.empty());
         fResourceProvider.checkCommandBuffers();
+        if (finishedProc) {
+            fResourceProvider.addFinishedProcToActiveCommandBuffers(finishedProc, finishedContext);
+        }
         return;
     }
 
@@ -330,6 +334,11 @@
     fCmdPool->close();
     fCurrentCmdBuffer->submitToQueue(this, fQueue, sync, fSemaphoresToSignal, fSemaphoresToWaitOn);
 
+    if (finishedProc) {
+        // Make sure this is called after closing the current command pool
+        fResourceProvider.addFinishedProcToActiveCommandBuffers(finishedProc, finishedContext);
+    }
+
     // We must delete and drawables that have been waitint till submit for us to destroy.
     fDrawables.reset();
 
@@ -1883,7 +1892,8 @@
 }
 
 void GrVkGpu::onFinishFlush(GrSurfaceProxy* proxy, SkSurface::BackendSurfaceAccess access,
-                            GrFlushFlags flags, bool insertedSemaphore) {
+                            GrFlushFlags flags, bool insertedSemaphore,
+                            GrGpuFinishedProc finishedProc, GrGpuFinishedContext finishedContext) {
     // Submit the current command buffer to the Queue. Whether we inserted semaphores or not does
     // not effect what we do here.
     if (proxy && access == SkSurface::BackendSurfaceAccess::kPresent) {
@@ -1899,9 +1909,9 @@
         image->prepareForPresent(this);
     }
     if (flags & kSyncCpu_GrFlushFlag) {
-        this->submitCommandBuffer(kForce_SyncQueue);
+        this->submitCommandBuffer(kForce_SyncQueue, finishedProc, finishedContext);
     } else {
-        this->submitCommandBuffer(kSkip_SyncQueue);
+        this->submitCommandBuffer(kSkip_SyncQueue, finishedProc, finishedContext);
     }
 }
 
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 963016f..1f820b2 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -221,7 +221,8 @@
                        const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) override;
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access, GrFlushFlags flags,
-                       bool insertedSemaphores) override;
+                       bool insertedSemaphores, GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override;
 
     // Ends and submits the current command buffer to the queue and then creates a new command
     // buffer and begins it. If sync is set to kForce_SyncQueue, the function will wait for all
@@ -229,7 +230,8 @@
     // fSemaphoreToSignal, we will add those signal semaphores to the submission of this command
     // buffer. If this GrVkGpu object has any semaphores in fSemaphoresToWaitOn, we will add those
     // wait semaphores to the submission of this command buffer.
-    void submitCommandBuffer(SyncQueue sync);
+    void submitCommandBuffer(SyncQueue sync, GrGpuFinishedProc finishedProc = nullptr,
+                             GrGpuFinishedContext finishedContext = nullptr);
 
     void internalResolveRenderTarget(GrRenderTarget*, bool requiresSubmit);
 
diff --git a/src/gpu/vk/GrVkResourceProvider.cpp b/src/gpu/vk/GrVkResourceProvider.cpp
index e232e4e..c875c71 100644
--- a/src/gpu/vk/GrVkResourceProvider.cpp
+++ b/src/gpu/vk/GrVkResourceProvider.cpp
@@ -369,6 +369,18 @@
     }
 }
 
+void GrVkResourceProvider::addFinishedProcToActiveCommandBuffers(
+        GrGpuFinishedProc finishedProc, GrGpuFinishedContext finishedContext) {
+    sk_sp<GrRefCntedCallback> procRef(new GrRefCntedCallback(finishedProc, finishedContext));
+    for (int i = 0; i < fActiveCommandPools.count(); ++i) {
+        GrVkCommandPool* pool = fActiveCommandPools[i];
+        if (!pool->isOpen()) {
+            GrVkPrimaryCommandBuffer* buffer = pool->getPrimaryCommandBuffer();
+            buffer->addFinishedProc(procRef);
+        }
+    }
+}
+
 const GrVkResource* GrVkResourceProvider::findOrCreateStandardUniformBufferResource() {
     const GrVkResource* resource = nullptr;
     int count = fAvailableUniformBufferResources.count();
diff --git a/src/gpu/vk/GrVkResourceProvider.h b/src/gpu/vk/GrVkResourceProvider.h
index 57481f9..3cc35f0 100644
--- a/src/gpu/vk/GrVkResourceProvider.h
+++ b/src/gpu/vk/GrVkResourceProvider.h
@@ -92,6 +92,13 @@
 
     void checkCommandBuffers();
 
+    // We must add the finishedProc to all active command buffers since we may have flushed work
+    // that the client cares about before they explicitly called flush and the GPU may reorder
+    // command execution. So we make sure all previously submitted work finishes before we call the
+    // finishedProc.
+    void addFinishedProcToActiveCommandBuffers(GrGpuFinishedProc finishedProc,
+                                               GrGpuFinishedContext finishedContext);
+
     // Finds or creates a compatible GrVkDescriptorPool for the requested type and count.
     // The refcount is incremented and a pointer returned.
     // TODO: Currently this will just create a descriptor pool without holding onto a ref itself
diff --git a/src/image/SkImage_Gpu.cpp b/src/image/SkImage_Gpu.cpp
index 1c67013..bd99208 100644
--- a/src/image/SkImage_Gpu.cpp
+++ b/src/image/SkImage_Gpu.cpp
@@ -655,7 +655,7 @@
     texContext->writePixels(srcInfo, pixmap.addr(0, 0), pixmap.rowBytes(), 0, 0);
 
     drawingManager->flush(proxy.get(), SkSurface::BackendSurfaceAccess::kNoAccess,
-                          kSyncCpu_GrFlushFlag, 0, nullptr);
+                          kSyncCpu_GrFlushFlag, 0, nullptr, nullptr, nullptr);
 
     return image;
 }
diff --git a/src/image/SkSurface.cpp b/src/image/SkSurface.cpp
index d7a05ee..4f36205 100644
--- a/src/image/SkSurface.cpp
+++ b/src/image/SkSurface.cpp
@@ -244,12 +244,16 @@
 }
 
 void SkSurface::flush() {
-    asSB(this)->onFlush(BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr);
+    asSB(this)->onFlush(BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr,
+                        nullptr, nullptr);
 }
 
 GrSemaphoresSubmitted SkSurface::flush(BackendSurfaceAccess access, GrFlushFlags flags,
-                                       int numSemaphores, GrBackendSemaphore signalSemaphores[]) {
-    return asSB(this)->onFlush(access, flags, numSemaphores, signalSemaphores);
+                                       int numSemaphores, GrBackendSemaphore signalSemaphores[],
+                                       GrGpuFinishedProc finishedProc,
+                                       GrGpuFinishedContext finishedContext) {
+    return asSB(this)->onFlush(access, flags, numSemaphores, signalSemaphores, finishedProc,
+                               finishedContext);
 }
 
 GrSemaphoresSubmitted SkSurface::flush(BackendSurfaceAccess access, FlushFlags flags,
@@ -261,7 +265,7 @@
 GrSemaphoresSubmitted SkSurface::flushAndSignalSemaphores(int numSemaphores,
                                                           GrBackendSemaphore signalSemaphores[]) {
     return asSB(this)->onFlush(BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags,
-                               numSemaphores, signalSemaphores);
+                               numSemaphores, signalSemaphores, nullptr, nullptr);
 }
 
 bool SkSurface::wait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) {
diff --git a/src/image/SkSurface_Base.h b/src/image/SkSurface_Base.h
index 0e3cd95..5b1afcc 100644
--- a/src/image/SkSurface_Base.h
+++ b/src/image/SkSurface_Base.h
@@ -82,7 +82,9 @@
      */
     virtual GrSemaphoresSubmitted onFlush(BackendSurfaceAccess access, GrFlushFlags flags,
                                           int numSemaphores,
-                                          GrBackendSemaphore signalSemaphores[]) {
+                                          GrBackendSemaphore signalSemaphores[],
+                                          GrGpuFinishedProc finishedProc,
+                                          GrGpuFinishedContext finishedContext) {
         return GrSemaphoresSubmitted::kNo;
     }
 
diff --git a/src/image/SkSurface_Gpu.cpp b/src/image/SkSurface_Gpu.cpp
index c0c4042..fce5cc8 100644
--- a/src/image/SkSurface_Gpu.cpp
+++ b/src/image/SkSurface_Gpu.cpp
@@ -50,8 +50,8 @@
     }
 
     // Grab the render target *after* firing notifications, as it may get switched if CoW kicks in.
-    surface->getDevice()->flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess::kNoAccess,
-                                                   kNone_GrFlushFlags, 0, nullptr);
+    surface->getDevice()->flush(SkSurface::BackendSurfaceAccess::kNoAccess,
+                                kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
     GrRenderTargetContext* rtc = surface->getDevice()->accessRenderTargetContext();
     return rtc->accessRenderTarget();
 }
@@ -160,8 +160,11 @@
 
 GrSemaphoresSubmitted SkSurface_Gpu::onFlush(BackendSurfaceAccess access, GrFlushFlags flags,
                                              int numSemaphores,
-                                             GrBackendSemaphore signalSemaphores[]) {
-    return fDevice->flushAndSignalSemaphores(access, flags, numSemaphores, signalSemaphores);
+                                             GrBackendSemaphore signalSemaphores[],
+                                             GrGpuFinishedProc finishedProc,
+                                             GrGpuFinishedContext finishedContext) {
+    return fDevice->flush(access, flags, numSemaphores, signalSemaphores, finishedProc,
+                          finishedContext);
 }
 
 bool SkSurface_Gpu::onWait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) {
diff --git a/src/image/SkSurface_Gpu.h b/src/image/SkSurface_Gpu.h
index 13b79c6..c708d22 100644
--- a/src/image/SkSurface_Gpu.h
+++ b/src/image/SkSurface_Gpu.h
@@ -34,7 +34,9 @@
     void onDiscard() override;
     GrSemaphoresSubmitted onFlush(BackendSurfaceAccess access, GrFlushFlags flags,
                                   int numSemaphores,
-                                  GrBackendSemaphore signalSemaphores[]) override;
+                                  GrBackendSemaphore signalSemaphores[],
+                                  GrGpuFinishedProc finishedProc,
+                                  GrGpuFinishedContext finishedContext) override;
     bool onWait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) override;
     bool onCharacterize(SkSurfaceCharacterization*) const override;
     void onDraw(SkCanvas* canvas, SkScalar x, SkScalar y, const SkPaint* paint) override;