Add callback to flush for knowing when gpu is finished work.

Bug: skia:8802
Change-Id: I093c2a4e879b635b169a849d9af3e9f7a3d84a88
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/207870
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/gn/tests.gni b/gn/tests.gni
index 6bce617..9436d7c 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -95,6 +95,7 @@
   "$_tests/GrCCPRTest.cpp",
   "$_tests/GrContextAbandonTest.cpp",
   "$_tests/GrContextFactoryTest.cpp",
+  "$_tests/GrFinishedFlushTest.cpp",
   "$_tests/GrGLExtensionsTest.cpp",
   "$_tests/GrMemoryPoolTest.cpp",
   "$_tests/GrMeshTest.cpp",
diff --git a/include/core/SkSurface.h b/include/core/SkSurface.h
index 48ec1f6..d189ea1 100644
--- a/include/core/SkSurface.h
+++ b/include/core/SkSurface.h
@@ -746,18 +746,26 @@
 
         Pending surface commands are flushed regardless of the return result.
 
+        If a finishedProc is provided, the finishedProc will be called when all work submitted to
+        the gpu from this flush call and all previous flush calls has finished on the GPU. If the
+        flush call fails due to an error and nothing ends up getting sent to the GPU, the finished
+        proc is called immediately.
+
         @param access            type of access the call will do on the backend object after flush
         @param flags             flush options
         @param numSemaphores     size of signalSemaphores array
         @param signalSemaphores  array of semaphore containers
+        @param finishedProc      proc called after gpu work from flush has finished
+        @param finishedContext   context passed into call to finishedProc
         @return                  one of: GrSemaphoresSubmitted::kYes, GrSemaphoresSubmitted::kNo
     */
     GrSemaphoresSubmitted flush(BackendSurfaceAccess access, GrFlushFlags flags,
-                                int numSemaphores, GrBackendSemaphore signalSemaphores[]);
+                                int numSemaphores, GrBackendSemaphore signalSemaphores[],
+                                GrGpuFinishedProc finishedProc = nullptr,
+                                GrGpuFinishedContext finishedContext = nullptr);
 
     /** The below enum and flush call are deprected
      */
-
     enum FlushFlags {
         kNone_FlushFlags = 0,
         // flush will wait till all submitted GPU work is finished before returning.
diff --git a/include/gpu/GrContext.h b/include/gpu/GrContext.h
index 4869746..c407830 100644
--- a/include/gpu/GrContext.h
+++ b/include/gpu/GrContext.h
@@ -260,16 +260,23 @@
      * themselves can be deleted as soon as this function returns.
      *
      * If the backend API is OpenGL only uninitialized GrBackendSemaphores are supported.
-     * If the backend API is Vulkan either initialized or unitialized semaphores are supported.
-     * If unitialized, the semaphores which are created will be valid for use only with the VkDevice
-     * with which they were created.
+     * If the backend API is Vulkan either initialized or uninitialized semaphores are supported.
+     * If uninitialized, the semaphores which are created will be valid for use only with the
+     * VkDevice with which they were created.
      *
-     * If this call returns GrSemaphoresSubmited::kNo, the GPU backend will not have created or
+     * If this call returns GrSemaphoresSubmitted::kNo, the GPU backend will not have created or
      * added any semaphores to signal on the GPU. Thus the client should not have the GPU wait on
      * any of the semaphores. However, any pending commands to the context will still be flushed.
+     *
+     * If a finishedProc is provided, the finishedProc will be called when all work submitted to the
+     * gpu from this flush call and all previous flush calls has finished on the GPU. If the flush
+     * call fails due to an error and nothing ends up getting sent to the GPU, the finished proc is
+     * called immediately.
      */
     GrSemaphoresSubmitted flush(GrFlushFlags flags, int numSemaphores,
-                                GrBackendSemaphore signalSemaphores[]);
+                                GrBackendSemaphore signalSemaphores[],
+                                GrGpuFinishedProc finishedProc = nullptr,
+                                GrGpuFinishedContext finishedContext = nullptr);
 
     /**
      * Deprecated.
diff --git a/include/gpu/GrTypes.h b/include/gpu/GrTypes.h
index 1d9ae7c..74ad506 100644
--- a/include/gpu/GrTypes.h
+++ b/include/gpu/GrTypes.h
@@ -265,6 +265,9 @@
     kSyncCpu_GrFlushFlag = 0x1,
 };
 
+typedef void* GrGpuFinishedContext;
+typedef void (*GrGpuFinishedProc)(GrGpuFinishedContext finishedContext);
+
 /**
  * Enum used as return value when flush with semaphores so the client knows whether the semaphores
  * were submitted to GPU or not.
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 075ac00..a2ca3d1 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -244,18 +244,21 @@
     RETURN_IF_ABANDONED
 
     this->drawingManager()->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                  kNone_GrFlushFlags, 0, nullptr);
+                                  kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
 }
 
 GrSemaphoresSubmitted GrContext::flush(GrFlushFlags flags, int numSemaphores,
-                                       GrBackendSemaphore signalSemaphores[]) {
+                                       GrBackendSemaphore signalSemaphores[],
+                                       GrGpuFinishedProc finishedProc,
+                                       GrGpuFinishedContext finishedContext) {
     ASSERT_SINGLE_OWNER
     if (this->abandoned()) {
         return GrSemaphoresSubmitted::kNo;
     }
 
     return this->drawingManager()->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                         flags, numSemaphores, signalSemaphores);
+                                         flags, numSemaphores, signalSemaphores, finishedProc,
+                                         finishedContext);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GrContextPriv.cpp b/src/gpu/GrContextPriv.cpp
index f545e90..9c1bc97 100644
--- a/src/gpu/GrContextPriv.cpp
+++ b/src/gpu/GrContextPriv.cpp
@@ -195,7 +195,7 @@
     ASSERT_OWNED_PROXY_PRIV(proxy);
 
     fContext->drawingManager()->flush(proxy, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                      kNone_GrFlushFlags, 0, nullptr);
+                                      kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
 }
 
 void GrContextPriv::prepareSurfaceForExternalIO(GrSurfaceProxy* proxy) {
@@ -204,7 +204,8 @@
     SkASSERT(proxy);
     ASSERT_OWNED_PROXY_PRIV(proxy);
     fContext->drawingManager()->prepareSurfaceForExternalIO(proxy,
-            SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr);
+            SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr,
+            nullptr, nullptr);
 }
 
 static bool valid_premul_color_type(GrColorType ct) {
diff --git a/src/gpu/GrDrawingManager.cpp b/src/gpu/GrDrawingManager.cpp
index 3cd1e51..4f559ea 100644
--- a/src/gpu/GrDrawingManager.cpp
+++ b/src/gpu/GrDrawingManager.cpp
@@ -210,28 +210,41 @@
                                               SkSurface::BackendSurfaceAccess access,
                                               GrFlushFlags flags,
                                               int numSemaphores,
-                                              GrBackendSemaphore backendSemaphores[]) {
+                                              GrBackendSemaphore backendSemaphores[],
+                                              GrGpuFinishedProc finishedProc,
+                                              GrGpuFinishedContext finishedContext) {
     GR_CREATE_TRACE_MARKER_CONTEXT("GrDrawingManager", "flush", fContext);
 
     if (fFlushing || this->wasAbandoned()) {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
         return GrSemaphoresSubmitted::kNo;
     }
 
     SkDEBUGCODE(this->validate());
 
-    if (kNone_GrFlushFlags == flags && !numSemaphores && proxy && !fDAG.isUsed(proxy)) {
+    if (kNone_GrFlushFlags == flags && !numSemaphores && !finishedProc &&
+            proxy && !fDAG.isUsed(proxy)) {
         return GrSemaphoresSubmitted::kNo;
     }
 
     auto direct = fContext->priv().asDirectContext();
     if (!direct) {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
         return GrSemaphoresSubmitted::kNo; // Can't flush while DDL recording
     }
 
     GrGpu* gpu = direct->priv().getGpu();
     if (!gpu) {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
         return GrSemaphoresSubmitted::kNo; // Can't flush while DDL recording
     }
+
     fFlushing = true;
 
     auto resourceProvider = direct->priv().resourceProvider();
@@ -351,7 +364,8 @@
 #endif
 
     GrSemaphoresSubmitted result = gpu->finishFlush(proxy, access, flags, numSemaphores,
-                                                    backendSemaphores);
+                                                    backendSemaphores, finishedProc,
+                                                    finishedContext);
 
     flushState.deinstantiateProxyTracker()->deinstantiateAllProxies();
 
@@ -442,7 +456,7 @@
         (*numOpListsExecuted)++;
         if (*numOpListsExecuted >= kMaxOpListsBeforeFlush) {
             flushState->gpu()->finishFlush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                           kNone_GrFlushFlags, 0, nullptr);
+                                           kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
             *numOpListsExecuted = 0;
         }
     }
@@ -460,7 +474,7 @@
         (*numOpListsExecuted)++;
         if (*numOpListsExecuted >= kMaxOpListsBeforeFlush) {
             flushState->gpu()->finishFlush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                           kNone_GrFlushFlags, 0, nullptr);
+                                           kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
             *numOpListsExecuted = 0;
         }
     }
@@ -480,7 +494,9 @@
 
 GrSemaphoresSubmitted GrDrawingManager::prepareSurfaceForExternalIO(
         GrSurfaceProxy* proxy, SkSurface::BackendSurfaceAccess access, GrFlushFlags flags,
-        int numSemaphores, GrBackendSemaphore backendSemaphores[]) {
+        int numSemaphores, GrBackendSemaphore backendSemaphores[],
+        GrGpuFinishedProc finishedProc,
+        GrGpuFinishedContext finishedContext) {
     if (this->wasAbandoned()) {
         return GrSemaphoresSubmitted::kNo;
     }
@@ -501,7 +517,7 @@
     // portion of the DAG required by 'proxy' in order to restore some of the
     // semantics of this method.
     GrSemaphoresSubmitted result = this->flush(proxy, access, flags, numSemaphores,
-                                               backendSemaphores);
+                                               backendSemaphores, finishedProc, finishedContext);
     if (!proxy->isInstantiated()) {
         return result;
     }
@@ -759,7 +775,7 @@
     auto resourceCache = direct->priv().getResourceCache();
     if (resourceCache && resourceCache->requestsFlush()) {
         this->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                    kNone_GrFlushFlags, 0, nullptr);
+                    kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
         resourceCache->purgeAsNeeded();
     }
 }
diff --git a/src/gpu/GrDrawingManager.h b/src/gpu/GrDrawingManager.h
index c927983..ef937fe 100644
--- a/src/gpu/GrDrawingManager.h
+++ b/src/gpu/GrDrawingManager.h
@@ -74,7 +74,9 @@
                                                       SkSurface::BackendSurfaceAccess access,
                                                       GrFlushFlags flags,
                                                       int numSemaphores,
-                                                      GrBackendSemaphore backendSemaphores[]);
+                                                      GrBackendSemaphore backendSemaphores[],
+                                                      GrGpuFinishedProc finishedProc,
+                                                      GrGpuFinishedContext finishedContext);
 
     void addOnFlushCallbackObject(GrOnFlushCallbackObject*);
 
@@ -153,7 +155,9 @@
                                 SkSurface::BackendSurfaceAccess access,
                                 GrFlushFlags flags,
                                 int numSemaphores,
-                                GrBackendSemaphore backendSemaphores[]);
+                                GrBackendSemaphore backendSemaphores[],
+                                GrGpuFinishedProc finishedProc,
+                                GrGpuFinishedContext finishedContext);
 
     SkDEBUGCODE(void validate() const);
 
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index c20b255..df909b6 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -421,7 +421,9 @@
 GrSemaphoresSubmitted GrGpu::finishFlush(GrSurfaceProxy* proxy,
                                          SkSurface::BackendSurfaceAccess access,
                                          GrFlushFlags flags, int numSemaphores,
-                                         GrBackendSemaphore backendSemaphores[]) {
+                                         GrBackendSemaphore backendSemaphores[],
+                                         GrGpuFinishedProc finishedProc,
+                                         GrGpuFinishedContext finishedContext) {
     this->stats()->incNumFinishFlushes();
     GrResourceProvider* resourceProvider = fContext->priv().resourceProvider();
 
@@ -443,7 +445,8 @@
         }
     }
     this->onFinishFlush(proxy, access, flags,
-                        (numSemaphores > 0 && this->caps()->fenceSyncSupport()));
+                        (numSemaphores > 0 && this->caps()->fenceSyncSupport()),
+                        finishedProc, finishedContext);
     return this->caps()->fenceSyncSupport() ? GrSemaphoresSubmitted::kYes
                                             : GrSemaphoresSubmitted::kNo;
 }
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index e44e74c..6b7ab84 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -304,7 +304,9 @@
     // inserted semaphores.
     GrSemaphoresSubmitted finishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
                                       GrFlushFlags flags, int numSemaphores,
-                                      GrBackendSemaphore backendSemaphores[]);
+                                      GrBackendSemaphore backendSemaphores[],
+                                      GrGpuFinishedProc finishedProc,
+                                      GrGpuFinishedContext finishedContext);
 
     virtual void submit(GrGpuCommandBuffer*) = 0;
 
@@ -547,7 +549,9 @@
                                bool canDiscardOutsideDstRect) = 0;
 
     virtual void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                               GrFlushFlags flags, bool insertedSemaphores) = 0;
+                               GrFlushFlags flags, bool insertedSemaphores,
+                               GrGpuFinishedProc finishedProc,
+                               GrGpuFinishedContext finishedContext) = 0;
 
 #ifdef SK_ENABLE_DUMP_GPU
     virtual void onDumpJSON(SkJSONWriter*) const {}
diff --git a/src/gpu/GrRenderTargetContext.cpp b/src/gpu/GrRenderTargetContext.cpp
index a53dd84..47806f8 100644
--- a/src/gpu/GrRenderTargetContext.cpp
+++ b/src/gpu/GrRenderTargetContext.cpp
@@ -1742,7 +1742,8 @@
 
 GrSemaphoresSubmitted GrRenderTargetContext::prepareForExternalIO(
         SkSurface::BackendSurfaceAccess access, GrFlushFlags flags, int numSemaphores,
-        GrBackendSemaphore backendSemaphores[]) {
+        GrBackendSemaphore backendSemaphores[], GrGpuFinishedProc finishedProc,
+        GrGpuFinishedContext finishedContext) {
     ASSERT_SINGLE_OWNER
     if (fContext->priv().abandoned()) {
         return GrSemaphoresSubmitted::kNo;
@@ -1753,7 +1754,9 @@
     return this->drawingManager()->prepareSurfaceForExternalIO(fRenderTargetProxy.get(),
                                                                access, flags,
                                                                numSemaphores,
-                                                               backendSemaphores);
+                                                               backendSemaphores,
+                                                               finishedProc,
+                                                               finishedContext);
 }
 
 bool GrRenderTargetContext::waitOnSemaphores(int numSemaphores,
diff --git a/src/gpu/GrRenderTargetContext.h b/src/gpu/GrRenderTargetContext.h
index c4ef529..8e42227 100644
--- a/src/gpu/GrRenderTargetContext.h
+++ b/src/gpu/GrRenderTargetContext.h
@@ -408,7 +408,9 @@
      */
     GrSemaphoresSubmitted prepareForExternalIO(SkSurface::BackendSurfaceAccess access,
                                                GrFlushFlags flags, int numSemaphores,
-                                               GrBackendSemaphore backendSemaphores[]);
+                                               GrBackendSemaphore backendSemaphores[],
+                                               GrGpuFinishedProc finishedProc,
+                                               GrGpuFinishedContext finishedContext);
 
     /**
      *  The next time this GrRenderTargetContext is flushed, the gpu will wait on the passed in
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 9f05af7..388450f 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -1627,18 +1627,19 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkGpuDevice::flush() {
-    this->flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess::kNoAccess,
-                                   kNone_GrFlushFlags, 0, nullptr);
+    this->flush(SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr, nullptr,
+                nullptr);
 }
 
-GrSemaphoresSubmitted SkGpuDevice::flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess access,
-                                                            GrFlushFlags flags,
-                                                            int numSemaphores,
-                                                            GrBackendSemaphore signalSemaphores[]) {
+GrSemaphoresSubmitted SkGpuDevice::flush(SkSurface::BackendSurfaceAccess access, GrFlushFlags flags,
+                                         int numSemaphores, GrBackendSemaphore signalSemaphores[],
+                                         GrGpuFinishedProc finishedProc,
+                                         GrGpuFinishedContext finishedContext) {
     ASSERT_SINGLE_OWNER
 
     return fRenderTargetContext->prepareForExternalIO(access, flags, numSemaphores,
-                                                      signalSemaphores);
+                                                      signalSemaphores, finishedProc,
+                                                      finishedContext);
 }
 
 bool SkGpuDevice::wait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) {
diff --git a/src/gpu/SkGpuDevice.h b/src/gpu/SkGpuDevice.h
index e36286b..fca1b27 100644
--- a/src/gpu/SkGpuDevice.h
+++ b/src/gpu/SkGpuDevice.h
@@ -120,10 +120,12 @@
     sk_sp<SkSpecialImage> snapBackImage(const SkIRect&) override;
 
     void flush() override;
-    GrSemaphoresSubmitted flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess access,
-                                                   GrFlushFlags flags,
-                                                   int numSemaphores,
-                                                   GrBackendSemaphore signalSemaphores[]);
+    GrSemaphoresSubmitted flush(SkSurface::BackendSurfaceAccess access,
+                                GrFlushFlags flags,
+                                int numSemaphores,
+                                GrBackendSemaphore signalSemaphores[],
+                                GrGpuFinishedProc finishedProc,
+                                GrGpuFinishedContext finishedContext);
     bool wait(int numSemaphores, const GrBackendSemaphore* waitSemaphores);
 
     bool onAccessPixels(SkPixmap*) override;
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 506d06a..e7ef184 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -4307,7 +4307,9 @@
 }
 
 void GrGLGpu::onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                            GrFlushFlags flags, bool insertedSemaphore) {
+                            GrFlushFlags flags, bool insertedSemaphore,
+                            GrGpuFinishedProc finishedProc,
+                            GrGpuFinishedContext finishedContext) {
     // If we inserted semaphores during the flush, we need to call GLFlush.
     if (insertedSemaphore) {
         GL_CALL(Flush());
@@ -4315,6 +4317,10 @@
     if (flags & kSyncCpu_GrFlushFlag) {
         GL_CALL(Finish());
     }
+    // TODO: We should have GL actually wait until the GPU has finished work on the GPU.
+    if (finishedProc) {
+        finishedProc(finishedContext);
+    }
 }
 
 void GrGLGpu::submit(GrGpuCommandBuffer* buffer) {
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 9cfd338..86504d8 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -294,7 +294,9 @@
     void flushBlend(const GrXferProcessor::BlendInfo& blendInfo, const GrSwizzle&);
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                       GrFlushFlags flags, bool insertedSemaphores) override;
+                       GrFlushFlags flags, bool insertedSemaphores,
+                       GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override;
 
     bool copySurfaceAsDraw(GrSurface* dst, GrSurfaceOrigin dstOrigin,
                            GrSurface* src, GrSurfaceOrigin srcOrigin,
diff --git a/src/gpu/mock/GrMockGpu.h b/src/gpu/mock/GrMockGpu.h
index 56c40ae..c1e0065 100644
--- a/src/gpu/mock/GrMockGpu.h
+++ b/src/gpu/mock/GrMockGpu.h
@@ -111,7 +111,13 @@
     void onResolveRenderTarget(GrRenderTarget* target) override { return; }
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                       GrFlushFlags flags, bool insertedSemaphores) override {}
+                       GrFlushFlags flags, bool insertedSemaphores,
+                       GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override {
+        if (finishedProc) {
+            finishedProc(finishedContext);
+        }
+    }
 
     GrStencilAttachment* createStencilAttachmentForRenderTarget(const GrRenderTarget*,
                                                                 int width,
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 18736eb..5e5c0a9 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -182,11 +182,21 @@
     void onResolveRenderTarget(GrRenderTarget* target) override { return; }
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access,
-                       GrFlushFlags flags, bool insertedSemaphores) override {
+                       GrFlushFlags flags, bool insertedSemaphores,
+                       GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override {
         if (flags & kSyncCpu_GrFlushFlag) {
             this->submitCommandBuffer(kForce_SyncQueue);
+            if (finishedProc) {
+                finishedProc(finishedContext);
+            }
         } else {
             this->submitCommandBuffer(kSkip_SyncQueue);
+            // TODO: support finishedProc to actually be called when the GPU is done with the work
+            // and not immediately.
+            if (finishedProc) {
+                finishedProc(finishedContext);
+            }
         }
     }
 
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index fb222af..016b657 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -632,13 +632,15 @@
         }
         SkASSERT(!err);
 
+        fFinishedProcs.reset();
+
         // Destroy the fence
         GR_VK_CALL(gpu->vkInterface(), DestroyFence(gpu->device(), fSubmitFence, nullptr));
         fSubmitFence = VK_NULL_HANDLE;
     }
 }
 
-bool GrVkPrimaryCommandBuffer::finished(const GrVkGpu* gpu) const {
+bool GrVkPrimaryCommandBuffer::finished(const GrVkGpu* gpu) {
     SkASSERT(!fIsActive);
     if (VK_NULL_HANDLE == fSubmitFence) {
         return true;
@@ -647,6 +649,7 @@
     VkResult err = GR_VK_CALL(gpu->vkInterface(), GetFenceStatus(gpu->device(), fSubmitFence));
     switch (err) {
         case VK_SUCCESS:
+            fFinishedProcs.reset();
             return true;
 
         case VK_NOT_READY:
@@ -661,6 +664,10 @@
     return false;
 }
 
+void GrVkPrimaryCommandBuffer::addFinishedProc(sk_sp<GrRefCntedCallback> finishedProc) {
+    fFinishedProcs.push_back(std::move(finishedProc));
+}
+
 void GrVkPrimaryCommandBuffer::onReleaseResources(GrVkGpu* gpu) {
     for (int i = 0; i < fSecondaryCommandBuffers.count(); ++i) {
         fSecondaryCommandBuffers[i]->releaseResources(gpu);
diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h
index 8c91a61..c1c8015 100644
--- a/src/gpu/vk/GrVkCommandBuffer.h
+++ b/src/gpu/vk/GrVkCommandBuffer.h
@@ -313,7 +313,9 @@
     void submitToQueue(const GrVkGpu* gpu, VkQueue queue, GrVkGpu::SyncQueue sync,
                        SkTArray<GrVkSemaphore::Resource*>& signalSemaphores,
                        SkTArray<GrVkSemaphore::Resource*>& waitSemaphores);
-    bool finished(const GrVkGpu* gpu) const;
+    bool finished(const GrVkGpu* gpu);
+
+    void addFinishedProc(sk_sp<GrRefCntedCallback> finishedProc);
 
     void recycleSecondaryCommandBuffers();
 
@@ -336,6 +338,7 @@
 
     SkTArray<GrVkSecondaryCommandBuffer*, true> fSecondaryCommandBuffers;
     VkFence                                     fSubmitFence;
+    SkTArray<sk_sp<GrRefCntedCallback>>         fFinishedProcs;
 
     typedef GrVkCommandBuffer INHERITED;
 };
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index e1032b5..207e5a3 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -316,13 +316,17 @@
     return fCachedTexCommandBuffer.get();
 }
 
-void GrVkGpu::submitCommandBuffer(SyncQueue sync) {
+void GrVkGpu::submitCommandBuffer(SyncQueue sync, GrGpuFinishedProc finishedProc,
+                                  GrGpuFinishedContext finishedContext) {
     SkASSERT(fCurrentCmdBuffer);
 
     if (!fCurrentCmdBuffer->hasWork() && kForce_SyncQueue != sync &&
         !fSemaphoresToSignal.count() && !fSemaphoresToWaitOn.count()) {
         SkASSERT(fDrawables.empty());
         fResourceProvider.checkCommandBuffers();
+        if (finishedProc) {
+            fResourceProvider.addFinishedProcToActiveCommandBuffers(finishedProc, finishedContext);
+        }
         return;
     }
 
@@ -330,6 +334,11 @@
     fCmdPool->close();
     fCurrentCmdBuffer->submitToQueue(this, fQueue, sync, fSemaphoresToSignal, fSemaphoresToWaitOn);
 
+    if (finishedProc) {
+        // Make sure this is called after closing the current command pool
+        fResourceProvider.addFinishedProcToActiveCommandBuffers(finishedProc, finishedContext);
+    }
+
     // We must delete and drawables that have been waitint till submit for us to destroy.
     fDrawables.reset();
 
@@ -1883,7 +1892,8 @@
 }
 
 void GrVkGpu::onFinishFlush(GrSurfaceProxy* proxy, SkSurface::BackendSurfaceAccess access,
-                            GrFlushFlags flags, bool insertedSemaphore) {
+                            GrFlushFlags flags, bool insertedSemaphore,
+                            GrGpuFinishedProc finishedProc, GrGpuFinishedContext finishedContext) {
     // Submit the current command buffer to the Queue. Whether we inserted semaphores or not does
     // not effect what we do here.
     if (proxy && access == SkSurface::BackendSurfaceAccess::kPresent) {
@@ -1899,9 +1909,9 @@
         image->prepareForPresent(this);
     }
     if (flags & kSyncCpu_GrFlushFlag) {
-        this->submitCommandBuffer(kForce_SyncQueue);
+        this->submitCommandBuffer(kForce_SyncQueue, finishedProc, finishedContext);
     } else {
-        this->submitCommandBuffer(kSkip_SyncQueue);
+        this->submitCommandBuffer(kSkip_SyncQueue, finishedProc, finishedContext);
     }
 }
 
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 963016f..1f820b2 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -221,7 +221,8 @@
                        const SkIPoint& dstPoint, bool canDiscardOutsideDstRect) override;
 
     void onFinishFlush(GrSurfaceProxy*, SkSurface::BackendSurfaceAccess access, GrFlushFlags flags,
-                       bool insertedSemaphores) override;
+                       bool insertedSemaphores, GrGpuFinishedProc finishedProc,
+                       GrGpuFinishedContext finishedContext) override;
 
     // Ends and submits the current command buffer to the queue and then creates a new command
     // buffer and begins it. If sync is set to kForce_SyncQueue, the function will wait for all
@@ -229,7 +230,8 @@
     // fSemaphoreToSignal, we will add those signal semaphores to the submission of this command
     // buffer. If this GrVkGpu object has any semaphores in fSemaphoresToWaitOn, we will add those
     // wait semaphores to the submission of this command buffer.
-    void submitCommandBuffer(SyncQueue sync);
+    void submitCommandBuffer(SyncQueue sync, GrGpuFinishedProc finishedProc = nullptr,
+                             GrGpuFinishedContext finishedContext = nullptr);
 
     void internalResolveRenderTarget(GrRenderTarget*, bool requiresSubmit);
 
diff --git a/src/gpu/vk/GrVkResourceProvider.cpp b/src/gpu/vk/GrVkResourceProvider.cpp
index e232e4e..c875c71 100644
--- a/src/gpu/vk/GrVkResourceProvider.cpp
+++ b/src/gpu/vk/GrVkResourceProvider.cpp
@@ -369,6 +369,18 @@
     }
 }
 
+void GrVkResourceProvider::addFinishedProcToActiveCommandBuffers(
+        GrGpuFinishedProc finishedProc, GrGpuFinishedContext finishedContext) {
+    sk_sp<GrRefCntedCallback> procRef(new GrRefCntedCallback(finishedProc, finishedContext));
+    for (int i = 0; i < fActiveCommandPools.count(); ++i) {
+        GrVkCommandPool* pool = fActiveCommandPools[i];
+        if (!pool->isOpen()) {
+            GrVkPrimaryCommandBuffer* buffer = pool->getPrimaryCommandBuffer();
+            buffer->addFinishedProc(procRef);
+        }
+    }
+}
+
 const GrVkResource* GrVkResourceProvider::findOrCreateStandardUniformBufferResource() {
     const GrVkResource* resource = nullptr;
     int count = fAvailableUniformBufferResources.count();
diff --git a/src/gpu/vk/GrVkResourceProvider.h b/src/gpu/vk/GrVkResourceProvider.h
index 57481f9..3cc35f0 100644
--- a/src/gpu/vk/GrVkResourceProvider.h
+++ b/src/gpu/vk/GrVkResourceProvider.h
@@ -92,6 +92,13 @@
 
     void checkCommandBuffers();
 
+    // We must add the finishedProc to all active command buffers since we may have flushed work
+    // that the client cares about before they explicitly called flush and the GPU may reorder
+    // command execution. So we make sure all previously submitted work finishes before we call the
+    // finishedProc.
+    void addFinishedProcToActiveCommandBuffers(GrGpuFinishedProc finishedProc,
+                                               GrGpuFinishedContext finishedContext);
+
     // Finds or creates a compatible GrVkDescriptorPool for the requested type and count.
     // The refcount is incremented and a pointer returned.
     // TODO: Currently this will just create a descriptor pool without holding onto a ref itself
diff --git a/src/image/SkImage_Gpu.cpp b/src/image/SkImage_Gpu.cpp
index 1c67013..bd99208 100644
--- a/src/image/SkImage_Gpu.cpp
+++ b/src/image/SkImage_Gpu.cpp
@@ -655,7 +655,7 @@
     texContext->writePixels(srcInfo, pixmap.addr(0, 0), pixmap.rowBytes(), 0, 0);
 
     drawingManager->flush(proxy.get(), SkSurface::BackendSurfaceAccess::kNoAccess,
-                          kSyncCpu_GrFlushFlag, 0, nullptr);
+                          kSyncCpu_GrFlushFlag, 0, nullptr, nullptr, nullptr);
 
     return image;
 }
diff --git a/src/image/SkSurface.cpp b/src/image/SkSurface.cpp
index d7a05ee..4f36205 100644
--- a/src/image/SkSurface.cpp
+++ b/src/image/SkSurface.cpp
@@ -244,12 +244,16 @@
 }
 
 void SkSurface::flush() {
-    asSB(this)->onFlush(BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr);
+    asSB(this)->onFlush(BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr,
+                        nullptr, nullptr);
 }
 
 GrSemaphoresSubmitted SkSurface::flush(BackendSurfaceAccess access, GrFlushFlags flags,
-                                       int numSemaphores, GrBackendSemaphore signalSemaphores[]) {
-    return asSB(this)->onFlush(access, flags, numSemaphores, signalSemaphores);
+                                       int numSemaphores, GrBackendSemaphore signalSemaphores[],
+                                       GrGpuFinishedProc finishedProc,
+                                       GrGpuFinishedContext finishedContext) {
+    return asSB(this)->onFlush(access, flags, numSemaphores, signalSemaphores, finishedProc,
+                               finishedContext);
 }
 
 GrSemaphoresSubmitted SkSurface::flush(BackendSurfaceAccess access, FlushFlags flags,
@@ -261,7 +265,7 @@
 GrSemaphoresSubmitted SkSurface::flushAndSignalSemaphores(int numSemaphores,
                                                           GrBackendSemaphore signalSemaphores[]) {
     return asSB(this)->onFlush(BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags,
-                               numSemaphores, signalSemaphores);
+                               numSemaphores, signalSemaphores, nullptr, nullptr);
 }
 
 bool SkSurface::wait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) {
diff --git a/src/image/SkSurface_Base.h b/src/image/SkSurface_Base.h
index 0e3cd95..5b1afcc 100644
--- a/src/image/SkSurface_Base.h
+++ b/src/image/SkSurface_Base.h
@@ -82,7 +82,9 @@
      */
     virtual GrSemaphoresSubmitted onFlush(BackendSurfaceAccess access, GrFlushFlags flags,
                                           int numSemaphores,
-                                          GrBackendSemaphore signalSemaphores[]) {
+                                          GrBackendSemaphore signalSemaphores[],
+                                          GrGpuFinishedProc finishedProc,
+                                          GrGpuFinishedContext finishedContext) {
         return GrSemaphoresSubmitted::kNo;
     }
 
diff --git a/src/image/SkSurface_Gpu.cpp b/src/image/SkSurface_Gpu.cpp
index c0c4042..fce5cc8 100644
--- a/src/image/SkSurface_Gpu.cpp
+++ b/src/image/SkSurface_Gpu.cpp
@@ -50,8 +50,8 @@
     }
 
     // Grab the render target *after* firing notifications, as it may get switched if CoW kicks in.
-    surface->getDevice()->flushAndSignalSemaphores(SkSurface::BackendSurfaceAccess::kNoAccess,
-                                                   kNone_GrFlushFlags, 0, nullptr);
+    surface->getDevice()->flush(SkSurface::BackendSurfaceAccess::kNoAccess,
+                                kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
     GrRenderTargetContext* rtc = surface->getDevice()->accessRenderTargetContext();
     return rtc->accessRenderTarget();
 }
@@ -160,8 +160,11 @@
 
 GrSemaphoresSubmitted SkSurface_Gpu::onFlush(BackendSurfaceAccess access, GrFlushFlags flags,
                                              int numSemaphores,
-                                             GrBackendSemaphore signalSemaphores[]) {
-    return fDevice->flushAndSignalSemaphores(access, flags, numSemaphores, signalSemaphores);
+                                             GrBackendSemaphore signalSemaphores[],
+                                             GrGpuFinishedProc finishedProc,
+                                             GrGpuFinishedContext finishedContext) {
+    return fDevice->flush(access, flags, numSemaphores, signalSemaphores, finishedProc,
+                          finishedContext);
 }
 
 bool SkSurface_Gpu::onWait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) {
diff --git a/src/image/SkSurface_Gpu.h b/src/image/SkSurface_Gpu.h
index 13b79c6..c708d22 100644
--- a/src/image/SkSurface_Gpu.h
+++ b/src/image/SkSurface_Gpu.h
@@ -34,7 +34,9 @@
     void onDiscard() override;
     GrSemaphoresSubmitted onFlush(BackendSurfaceAccess access, GrFlushFlags flags,
                                   int numSemaphores,
-                                  GrBackendSemaphore signalSemaphores[]) override;
+                                  GrBackendSemaphore signalSemaphores[],
+                                  GrGpuFinishedProc finishedProc,
+                                  GrGpuFinishedContext finishedContext) override;
     bool onWait(int numSemaphores, const GrBackendSemaphore* waitSemaphores) override;
     bool onCharacterize(SkSurfaceCharacterization*) const override;
     void onDraw(SkCanvas* canvas, SkScalar x, SkScalar y, const SkPaint* paint) override;
diff --git a/tests/DefaultPathRendererTest.cpp b/tests/DefaultPathRendererTest.cpp
index 7475729..8b13ba6 100644
--- a/tests/DefaultPathRendererTest.cpp
+++ b/tests/DefaultPathRendererTest.cpp
@@ -100,7 +100,7 @@
                       SkMatrix::I(), invPath, style);
 
         rtc->prepareForExternalIO(SkSurface::BackendSurfaceAccess::kNoAccess,
-                                  kNone_GrFlushFlags, 0, nullptr);
+                                  kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
     }
 
     {
diff --git a/tests/GLProgramsTest.cpp b/tests/GLProgramsTest.cpp
index 78d3749..9cdd113 100644
--- a/tests/GLProgramsTest.cpp
+++ b/tests/GLProgramsTest.cpp
@@ -314,7 +314,7 @@
     }
     // Flush everything, test passes if flush is successful(ie, no asserts are hit, no crashes)
     drawingManager->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                          kNone_GrFlushFlags, 0, nullptr);
+                          kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
 
     const GrBackendFormat format =
             context->priv().caps()->getBackendFormatFromColorType(kRGBA_8888_SkColorType);
@@ -344,7 +344,7 @@
             paint.addColorFragmentProcessor(std::move(blockFP));
             GrDrawRandomOp(&random, renderTargetContext.get(), std::move(paint));
             drawingManager->flush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                                  kNone_GrFlushFlags, 0, nullptr);
+                                  kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
         }
     }
 
diff --git a/tests/GrFinishedFlushTest.cpp b/tests/GrFinishedFlushTest.cpp
new file mode 100644
index 0000000..6b825b1
--- /dev/null
+++ b/tests/GrFinishedFlushTest.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "Test.h"
+
+#include "GrContext.h"
+#include "GrContextPriv.h"
+#include "GrGpu.h"
+#include "SkSurface.h"
+
+using namespace sk_gpu_test;
+
+static void testing_finished_proc(void* ctx) {
+    int* count = (int*)ctx;
+    *count += 1;
+}
+
+DEF_GPUTEST_FOR_RENDERING_CONTEXTS(FlushFinishedProcTest, reporter, ctxInfo) {
+    GrContext* ctx = ctxInfo.grContext();
+
+    SkImageInfo info =
+            SkImageInfo::Make(8, 8, kRGBA_8888_SkColorType, kPremul_SkAlphaType);
+    sk_sp<SkSurface> surface = SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info);
+    SkCanvas* canvas = surface->getCanvas();
+
+    // We flush the surface first just to get rid of any discards/clears that got recorded from
+    // making the surface.
+    surface->flush();
+    ctx->flush(kSyncCpu_GrFlushFlag, 0, nullptr);
+
+    int count = 0;
+
+    // There is no work on the surface so flushing should immediately call the finished proc.
+    surface->flush(SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr,
+                   testing_finished_proc, (void*)&count);
+
+    REPORTER_ASSERT(reporter, count == 1);
+
+    canvas->clear(SK_ColorRED);
+
+    surface->flush(SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr,
+                   testing_finished_proc, (void*)&count);
+
+    bool isVulkan = ctx->backend() == GrBackendApi::kVulkan;
+    if (isVulkan) {
+        // On Vulkan the command buffer we just submitted may or may not have finished immediately
+        // so the finish proc may not have been called.
+        REPORTER_ASSERT(reporter, count == 1 || count == 2);
+    } else {
+        REPORTER_ASSERT(reporter, count == 2);
+    }
+    ctx->flush(kSyncCpu_GrFlushFlag, 0, nullptr);
+    REPORTER_ASSERT(reporter, count == 2);
+
+    // Test flushing via the GrContext
+    canvas->clear(SK_ColorBLUE);
+    ctx->flush(kNone_GrFlushFlags, 0, nullptr, testing_finished_proc, (void*)&count);
+    if (isVulkan) {
+        // On Vulkan the command buffer we just submitted may or may not have finished immediately
+        // so the finish proc may not have been called.
+        REPORTER_ASSERT(reporter, count == 2 || count == 3);
+    } else {
+        REPORTER_ASSERT(reporter, count == 3);
+    }
+    ctx->flush(kSyncCpu_GrFlushFlag, 0, nullptr);
+    REPORTER_ASSERT(reporter, count == 3);
+
+    // There is no work on the surface so flushing should immediately call the finished proc.
+    ctx->flush(kNone_GrFlushFlags, 0, nullptr, testing_finished_proc, (void*)&count);
+    REPORTER_ASSERT(reporter, count == 4);
+
+    count = 0;
+    int count2 = 0;
+    canvas->clear(SK_ColorGREEN);
+    surface->flush(SkSurface::BackendSurfaceAccess::kNoAccess, kNone_GrFlushFlags, 0, nullptr,
+                   testing_finished_proc, (void*)&count);
+    // There is no work to be flushed here so this will return immediately, but make sure the
+    // finished call from this proc isn't called till the previous surface flush also is finished.
+    ctx->flush(kNone_GrFlushFlags, 0, nullptr, testing_finished_proc, (void*)&count2);
+
+    REPORTER_ASSERT(reporter, count == count2);
+
+    ctx->flush(kSyncCpu_GrFlushFlag, 0, nullptr);
+
+    REPORTER_ASSERT(reporter, count == 1);
+    REPORTER_ASSERT(reporter, count == count2);
+}
+
diff --git a/tests/OnFlushCallbackTest.cpp b/tests/OnFlushCallbackTest.cpp
index 3c97842..534739e 100644
--- a/tests/OnFlushCallbackTest.cpp
+++ b/tests/OnFlushCallbackTest.cpp
@@ -581,7 +581,7 @@
     }
 
     rtc->prepareForExternalIO(SkSurface::BackendSurfaceAccess::kNoAccess,
-                              kNone_GrFlushFlags, 0, nullptr);
+                              kNone_GrFlushFlags, 0, nullptr, nullptr, nullptr);
 
     SkBitmap readBack;
     readBack.allocN32Pixels(kFinalWidth, kFinalHeight);
diff --git a/tests/TransferPixelsTest.cpp b/tests/TransferPixelsTest.cpp
index bb0a936..c55e4ef 100644
--- a/tests/TransferPixelsTest.cpp
+++ b/tests/TransferPixelsTest.cpp
@@ -279,7 +279,7 @@
 
         // TODO(bsalomon): caps to know if the map() is synchronous and skip the flush if so.
         gpu->finishFlush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                         kSyncCpu_GrFlushFlag, 0, nullptr);
+                         kSyncCpu_GrFlushFlag, 0, nullptr, nullptr, nullptr);
 
         const auto* map = reinterpret_cast<const GrColor*>(buffer->map());
         REPORTER_ASSERT(reporter, map);
@@ -308,7 +308,7 @@
 
         // TODO(bsalomon): caps to know if the map() is synchronous and skip the flush if so.
         gpu->finishFlush(nullptr, SkSurface::BackendSurfaceAccess::kNoAccess,
-                         kSyncCpu_GrFlushFlag, 0, nullptr);
+                         kSyncCpu_GrFlushFlag, 0, nullptr, nullptr, nullptr);
 
         map = reinterpret_cast<const GrColor*>(buffer->map());
         REPORTER_ASSERT(reporter, map);