Add support for Semaphores (gpu waiting on gpu) in Ganesh

BUG=skia:

Change-Id: I4324b65bc50a3dfd90372459899870d5f1952fdc
Reviewed-on: https://skia-review.googlesource.com/9120
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index b1d20e2..64c1c88 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -398,9 +398,12 @@
     this->invalidateState();
 }
 
-void GrVkPrimaryCommandBuffer::submitToQueue(const GrVkGpu* gpu,
-                                             VkQueue queue,
-                                             GrVkGpu::SyncQueue sync) {
+void GrVkPrimaryCommandBuffer::submitToQueue(
+        const GrVkGpu* gpu,
+        VkQueue queue,
+        GrVkGpu::SyncQueue sync,
+        const GrVkSemaphore::Resource* signalSemaphore,
+        SkTArray<const GrVkSemaphore::Resource*>& waitSemaphores) {
     SkASSERT(!fIsActive);
 
     VkResult err;
@@ -415,17 +418,36 @@
         GR_VK_CALL(gpu->vkInterface(), ResetFences(gpu->device(), 1, &fSubmitFence));
     }
 
+    if (signalSemaphore) {
+        this->addResource(signalSemaphore);
+    }
+
+    int waitCount = waitSemaphores.count();
+    SkTArray<VkSemaphore> vkWaitSems(waitCount);
+    SkTArray<VkPipelineStageFlags> vkWaitStages(waitCount);
+    if (waitCount) {
+        for (int i = 0; i < waitCount; ++i) {
+            this->addResource(waitSemaphores[i]);
+            vkWaitSems.push_back(waitSemaphores[i]->semaphore());
+            vkWaitStages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
+        }
+    }
+    SkTArray<VkSemaphore> vkSignalSem;
+    if (signalSemaphore) {
+        vkSignalSem.push_back(signalSemaphore->semaphore());
+    }
+
     VkSubmitInfo submitInfo;
     memset(&submitInfo, 0, sizeof(VkSubmitInfo));
     submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
     submitInfo.pNext = nullptr;
-    submitInfo.waitSemaphoreCount = 0;
-    submitInfo.pWaitSemaphores = nullptr;
-    submitInfo.pWaitDstStageMask = 0;
+    submitInfo.waitSemaphoreCount = waitCount;
+    submitInfo.pWaitSemaphores = vkWaitSems.begin();
+    submitInfo.pWaitDstStageMask = vkWaitStages.begin();
     submitInfo.commandBufferCount = 1;
     submitInfo.pCommandBuffers = &fCmdBuffer;
-    submitInfo.signalSemaphoreCount = 0;
-    submitInfo.pSignalSemaphores = nullptr;
+    submitInfo.signalSemaphoreCount = vkSignalSem.count();
+    submitInfo.pSignalSemaphores = vkSignalSem.begin();
     GR_VK_CALL_ERRCHECK(gpu->vkInterface(), QueueSubmit(queue, 1, &submitInfo, fSubmitFence));
 
     if (GrVkGpu::kForce_SyncQueue == sync) {
diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h
index e28df44..e156861 100644
--- a/src/gpu/vk/GrVkCommandBuffer.h
+++ b/src/gpu/vk/GrVkCommandBuffer.h
@@ -10,6 +10,7 @@
 
 #include "GrVkGpu.h"
 #include "GrVkResource.h"
+#include "GrVkSemaphore.h"
 #include "GrVkUtil.h"
 #include "vk/GrVkDefines.h"
 
@@ -298,7 +299,9 @@
                       uint32_t regionCount,
                       const VkImageResolve* regions);
 
-    void submitToQueue(const GrVkGpu* gpu, VkQueue queue, GrVkGpu::SyncQueue sync);
+    void submitToQueue(const GrVkGpu* gpu, VkQueue queue, GrVkGpu::SyncQueue sync,
+                       const GrVkSemaphore::Resource* signalSemaphore,
+                       SkTArray<const GrVkSemaphore::Resource*>& waitSemaphores);
     bool finished(const GrVkGpu* gpu) const;
 
 #ifdef SK_TRACE_VK_RESOURCES
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 8569c93..b85aa2d 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -25,6 +25,7 @@
 #include "GrVkPipelineState.h"
 #include "GrVkRenderPass.h"
 #include "GrVkResourceProvider.h"
+#include "GrVkSemaphore.h"
 #include "GrVkTexture.h"
 #include "GrVkTextureRenderTarget.h"
 #include "GrVkTransferBuffer.h"
@@ -181,6 +182,11 @@
     SkASSERT(VK_SUCCESS == res || VK_ERROR_DEVICE_LOST == res);
 #endif
 
+    for (int i = 0; i < fSemaphoresToWaitOn.count(); ++i) {
+        fSemaphoresToWaitOn[i]->unref(this);
+    }
+    fSemaphoresToWaitOn.reset();
+
     fCopyManager.destroyResources(this);
 
     // must call this just before we destroy the command pool and VkDevice
@@ -206,11 +212,18 @@
     return new GrVkGpuCommandBuffer(this, colorInfo, stencilInfo);
 }
 
-void GrVkGpu::submitCommandBuffer(SyncQueue sync) {
+void GrVkGpu::submitCommandBuffer(SyncQueue sync,
+                                  const GrVkSemaphore::Resource* signalSemaphore) {
     SkASSERT(fCurrentCmdBuffer);
     fCurrentCmdBuffer->end(this);
 
-    fCurrentCmdBuffer->submitToQueue(this, fQueue, sync);
+    fCurrentCmdBuffer->submitToQueue(this, fQueue, sync, signalSemaphore, fSemaphoresToWaitOn);
+
+    for (int i = 0; i < fSemaphoresToWaitOn.count(); ++i) {
+        fSemaphoresToWaitOn[i]->unref(this);
+    }
+    fSemaphoresToWaitOn.reset();
+
     fResourceProvider.checkCommandBuffers();
 
     // Release old command buffer and create a new one
@@ -1832,32 +1845,48 @@
     this->didWriteToSurface(target, &bounds);
 }
 
-GrFence SK_WARN_UNUSED_RESULT GrVkGpu::insertFence() const {
+GrFence SK_WARN_UNUSED_RESULT GrVkGpu::insertFence() {
     VkFenceCreateInfo createInfo;
     memset(&createInfo, 0, sizeof(VkFenceCreateInfo));
     createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
     createInfo.pNext = nullptr;
     createInfo.flags = 0;
     VkFence fence = VK_NULL_HANDLE;
-    VkResult result = GR_VK_CALL(this->vkInterface(), CreateFence(this->device(), &createInfo,
-                                                                  nullptr, &fence));
-    // TODO: verify that all QueueSubmits before this will finish before this fence signals
-    if (VK_SUCCESS == result) {
-        GR_VK_CALL(this->vkInterface(), QueueSubmit(this->queue(), 0, nullptr, fence));
-    }
+
+    VK_CALL_ERRCHECK(CreateFence(this->device(), &createInfo, nullptr, &fence));
+    VK_CALL(QueueSubmit(this->queue(), 0, nullptr, fence));
+
+    GR_STATIC_ASSERT(sizeof(GrFence) >= sizeof(VkFence));
     return (GrFence)fence;
 }
 
-bool GrVkGpu::waitFence(GrFence fence, uint64_t timeout) const {
-    VkResult result = GR_VK_CALL(this->vkInterface(), WaitForFences(this->device(), 1,
-                                                                    (VkFence*)&fence,
-                                                                    VK_TRUE,
-                                                                    timeout));
+bool GrVkGpu::waitFence(GrFence fence, uint64_t timeout) {
+    SkASSERT(VK_NULL_HANDLE != (VkFence)fence);
+
+    VkResult result = VK_CALL(WaitForFences(this->device(), 1, (VkFence*)&fence, VK_TRUE, timeout));
     return (VK_SUCCESS == result);
 }
 
 void GrVkGpu::deleteFence(GrFence fence) const {
-    GR_VK_CALL(this->vkInterface(), DestroyFence(this->device(), (VkFence)fence, nullptr));
+    VK_CALL(DestroyFence(this->device(), (VkFence)fence, nullptr));
+}
+
+sk_sp<GrSemaphore> SK_WARN_UNUSED_RESULT GrVkGpu::makeSemaphore() {
+    return GrVkSemaphore::Make(this);
+}
+
+void GrVkGpu::insertSemaphore(sk_sp<GrSemaphore> semaphore) {
+    GrVkSemaphore* vkSem = static_cast<GrVkSemaphore*>(semaphore.get());
+
+    this->submitCommandBuffer(kSkip_SyncQueue, vkSem->getResource());
+}
+
+void GrVkGpu::waitSemaphore(sk_sp<GrSemaphore> semaphore) {
+    GrVkSemaphore* vkSem = static_cast<GrVkSemaphore*>(semaphore.get());
+
+    const GrVkSemaphore::Resource* resource = vkSem->getResource();
+    resource->ref();
+    fSemaphoresToWaitOn.push_back(resource);
 }
 
 void GrVkGpu::flush() {
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index c935945..7203bf1 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -16,6 +16,7 @@
 #include "GrVkIndexBuffer.h"
 #include "GrVkMemory.h"
 #include "GrVkResourceProvider.h"
+#include "GrVkSemaphore.h"
 #include "GrVkVertexBuffer.h"
 #include "GrVkUtil.h"
 #include "vk/GrVkDefines.h"
@@ -129,10 +130,14 @@
 
     void finishOpList() override;
 
-    GrFence SK_WARN_UNUSED_RESULT insertFence() const override;
-    bool waitFence(GrFence, uint64_t timeout) const override;
+    GrFence SK_WARN_UNUSED_RESULT insertFence() override;
+    bool waitFence(GrFence, uint64_t timeout) override;
     void deleteFence(GrFence) const override;
 
+    sk_sp<GrSemaphore> SK_WARN_UNUSED_RESULT makeSemaphore() override;
+    void insertSemaphore(sk_sp<GrSemaphore> semaphore) override;
+    void waitSemaphore(sk_sp<GrSemaphore> semaphore) override;
+
     void flush() override;
 
     void generateMipmap(GrVkTexture* tex);
@@ -202,8 +207,12 @@
 
     // Ends and submits the current command buffer to the queue and then creates a new command
     // buffer and begins it. If sync is set to kForce_SyncQueue, the function will wait for all
-    // work in the queue to finish before returning.
-    void submitCommandBuffer(SyncQueue sync);
+    // work in the queue to finish before returning. If the signalSemaphore is not VK_NULL_HANDLE,
+    // we will signal the semaphore at the end of this command buffer. If this GrVkGpu object has
+    // any semaphores in fSemaphoresToWaitOn, we will add those wait semaphores to this command
+    // buffer when submitting.
+    void submitCommandBuffer(SyncQueue sync,
+                             const GrVkSemaphore::Resource* signalSemaphore = nullptr);
 
     void internalResolveRenderTarget(GrRenderTarget* target, bool requiresSubmit);
 
@@ -247,18 +256,22 @@
 
     // These Vulkan objects are provided by the client, and also stored in fBackendContext.
     // They're copied here for convenient access.
-    VkDevice                               fDevice;
-    VkQueue                                fQueue;    // Must be Graphics queue
+    VkDevice                                     fDevice;
+    VkQueue                                      fQueue;    // Must be Graphics queue
 
     // Created by GrVkGpu
-    GrVkResourceProvider                   fResourceProvider;
-    VkCommandPool                          fCmdPool;
-    GrVkPrimaryCommandBuffer*              fCurrentCmdBuffer;
-    VkPhysicalDeviceMemoryProperties       fPhysDevMemProps;
+    GrVkResourceProvider                         fResourceProvider;
+    VkCommandPool                                fCmdPool;
 
-    std::unique_ptr<GrVkHeap>              fHeaps[kHeapCount];
+    GrVkPrimaryCommandBuffer*                    fCurrentCmdBuffer;
 
-    GrVkCopyManager                        fCopyManager;
+    SkSTArray<1, const GrVkSemaphore::Resource*> fSemaphoresToWaitOn;
+
+    VkPhysicalDeviceMemoryProperties             fPhysDevMemProps;
+
+    std::unique_ptr<GrVkHeap>                    fHeaps[kHeapCount];
+
+    GrVkCopyManager                              fCopyManager;
 
 #ifdef SK_ENABLE_VK_LAYERS
     // For reporting validation layer errors
diff --git a/src/gpu/vk/GrVkSemaphore.cpp b/src/gpu/vk/GrVkSemaphore.cpp
new file mode 100644
index 0000000..d84635f
--- /dev/null
+++ b/src/gpu/vk/GrVkSemaphore.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrVkSemaphore.h"
+
+#include "GrVkGpu.h"
+#include "GrVkUtil.h"
+
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+// windows wants to define this as CreateSemaphoreA or CreateSemaphoreW
+#undef CreateSemaphore
+#endif
+
+sk_sp<GrVkSemaphore> GrVkSemaphore::Make(const GrVkGpu* gpu) {
+    VkSemaphoreCreateInfo createInfo;
+    memset(&createInfo, 0, sizeof(VkFenceCreateInfo));
+    createInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+    createInfo.pNext = nullptr;
+    createInfo.flags = 0;
+    VkSemaphore semaphore = VK_NULL_HANDLE;
+    GR_VK_CALL_ERRCHECK(gpu->vkInterface(),
+                        CreateSemaphore(gpu->device(), &createInfo, nullptr, &semaphore));
+
+    return sk_sp<GrVkSemaphore>(new GrVkSemaphore(gpu, semaphore));
+}
+
+GrVkSemaphore::GrVkSemaphore(const GrVkGpu* gpu, VkSemaphore semaphore) : INHERITED(gpu) {
+    fResource = new Resource(semaphore);
+}
+
+GrVkSemaphore::~GrVkSemaphore() {
+    if (fGpu) {
+        fResource->unref(static_cast<const GrVkGpu*>(fGpu));
+    } else {
+        fResource->unrefAndAbandon();
+    }
+}
+
+void GrVkSemaphore::Resource::freeGPUData(const GrVkGpu* gpu) const {
+    GR_VK_CALL(gpu->vkInterface(),
+               DestroySemaphore(gpu->device(), fSemaphore, nullptr));
+}
+
diff --git a/src/gpu/vk/GrVkSemaphore.h b/src/gpu/vk/GrVkSemaphore.h
new file mode 100644
index 0000000..0a3bc17
--- /dev/null
+++ b/src/gpu/vk/GrVkSemaphore.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrVkSemaphore_DEFINED
+#define GrVkSemaphore_DEFINED
+
+#include "GrSemaphore.h"
+#include "GrVkResource.h"
+
+#include "vk/GrVkTypes.h"
+
+class GrVkGpu;
+
+class GrVkSemaphore : public GrSemaphore {
+public:
+    static sk_sp<GrVkSemaphore> Make(const GrVkGpu* gpu);
+
+    ~GrVkSemaphore() override;
+
+    class Resource : public GrVkResource {
+    public:
+        Resource(VkSemaphore semaphore) : INHERITED(), fSemaphore(semaphore) {}
+
+        ~Resource() override {}
+
+        VkSemaphore semaphore() const { return fSemaphore; }
+
+#ifdef SK_TRACE_VK_RESOURCES
+        void dumpInfo() const override {
+            SkDebugf("GrVkSemaphore: %d (%d refs)\n", fSemaphore, this->getRefCnt());
+        }
+#endif
+    private:
+        void freeGPUData(const GrVkGpu* gpu) const override;
+
+        VkSemaphore fSemaphore;
+
+        typedef GrVkResource INHERITED;
+    };
+
+    const Resource* getResource() const { return fResource; }
+
+private:
+    GrVkSemaphore(const GrVkGpu* gpu, VkSemaphore semaphore);
+
+    const Resource* fResource;
+
+    typedef GrSemaphore INHERITED;
+};
+
+#endif