Vulkan: Implement GLsync and EGLSync fence syncs

That is required in GLES 3 for GLsync and EGL_KHR_fence_sync and
EGL_KHR_wait_sync (or EGL 1.5) for EGLSync.

The two constructs (GLsync and EGLSync) have similar semantics and share
the implementation on the Vulkan backend.

The implementation of a fence sync object is achieved through the
combined use of a vkEvent and the implicit vkFence inserted at the end
of every submission.  Imagine the following command buffer:

    glDraw      : Draw
    glCreateSync: Set Event  <-- insertion of fence sync
    glDraw      : Draw
                : Signal Fence <-- implicit fence at the end of submission
    glFlush     : Submit

Assume the serial S is associated to this submission.  The following
hold:

- If event is set, the fence sync is signaled
- If S is already finished, the fence sync is signaled
- If client is waiting on the sync and S is not yet flushed, there will
  be a deadlock (unless multi-threaded and another thread performs the
  flush).

The event is used to implement server waits (glWaitSync), as vkEvent is
the only entity the GPU can signal and wait on within the command
buffer.  The wait is inserted in the command graph without incurring a
flush, i.e. the wait can be within the same command buffer as event set.

The event however does not support CPU waits (glClientWaitSync).
vkFence is the only entity the CPU can wait on.  For client wait
therefore, the following algorithm is used:

- If the event is already set, there's no wait  ->  already signaled
- If timeout is zero, there's no wait  ->  timeout expired
- If S is not flushed, flush it to ensure forward progress.
- Wait until S is finished  ->  condition satisfied / timeout expired.

Bug: angleproject:2466
Change-Id: I678995a6139dd9533fa8ad361a3d292b202c52a4
Reviewed-on: https://chromium-review.googlesource.com/c/1422552
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/CommandGraph.cpp b/src/libANGLE/renderer/vulkan/CommandGraph.cpp
index adce3e3..2aef407 100644
--- a/src/libANGLE/renderer/vulkan/CommandGraph.cpp
+++ b/src/libANGLE/renderer/vulkan/CommandGraph.cpp
@@ -74,6 +74,17 @@
                     UNREACHABLE();
                     return "Query";
             }
+        case CommandGraphResourceType::FenceSync:
+            switch (function)
+            {
+                case CommandGraphNodeFunction::SetFenceSync:
+                    return "SetFenceSync";
+                case CommandGraphNodeFunction::WaitFenceSync:
+                    return "WaitFenceSync";
+                default:
+                    UNREACHABLE();
+                    return "FenceSync";
+            }
         default:
             UNREACHABLE();
             return "";
@@ -211,6 +222,7 @@
       mFunction(function),
       mQueryPool(VK_NULL_HANDLE),
       mQueryIndex(0),
+      mFenceSyncEvent(VK_NULL_HANDLE),
       mHasChildren(false),
       mVisitedState(VisitedState::Unvisited),
       mGlobalMemoryBarrierSrcAccess(0),
@@ -337,6 +349,13 @@
     mQueryIndex = queryIndex;
 }
 
+void CommandGraphNode::setFenceSync(const vk::Event &event)
+{
+    ASSERT(mFunction == CommandGraphNodeFunction::SetFenceSync ||
+           mFunction == CommandGraphNodeFunction::WaitFenceSync);
+    mFenceSyncEvent = event.getHandle();
+}
+
 // Do not call this in anything but testing code, since it's slow.
 bool CommandGraphNode::isChildOf(CommandGraphNode *parent)
 {
@@ -381,7 +400,7 @@
     switch (mFunction)
     {
         case CommandGraphNodeFunction::Generic:
-            ASSERT(mQueryPool == VK_NULL_HANDLE);
+            ASSERT(mQueryPool == VK_NULL_HANDLE && mFenceSyncEvent == VK_NULL_HANDLE);
 
             // Record the deferred pipeline barrier if necessary.
             ASSERT((mGlobalMemoryBarrierDstAccess == 0) == (mGlobalMemoryBarrierSrcAccess == 0));
@@ -461,6 +480,25 @@
 
             break;
 
+        case CommandGraphNodeFunction::SetFenceSync:
+            ASSERT(!mOutsideRenderPassCommands.valid() && !mInsideRenderPassCommands.valid());
+            ASSERT(mFenceSyncEvent != VK_NULL_HANDLE);
+
+            primaryCommandBuffer->setEvent(mFenceSyncEvent, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+
+            break;
+
+        case CommandGraphNodeFunction::WaitFenceSync:
+            ASSERT(!mOutsideRenderPassCommands.valid() && !mInsideRenderPassCommands.valid());
+            ASSERT(mFenceSyncEvent != VK_NULL_HANDLE);
+
+            // Fence Syncs are purely execution barriers, so there are no memory barriers attached.
+            primaryCommandBuffer->waitEvents(
+                1, &mFenceSyncEvent, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr);
+
+            break;
+
         default:
             UNREACHABLE();
     }
@@ -660,6 +698,20 @@
     newNode->setQueryPool(queryPool, queryIndex);
 }
 
+void CommandGraph::setFenceSync(const vk::Event &event)
+{
+    CommandGraphNode *newNode = allocateBarrierNode(CommandGraphResourceType::FenceSync,
+                                                    CommandGraphNodeFunction::SetFenceSync);
+    newNode->setFenceSync(event);
+}
+
+void CommandGraph::waitFenceSync(const vk::Event &event)
+{
+    CommandGraphNode *newNode = allocateBarrierNode(CommandGraphResourceType::FenceSync,
+                                                    CommandGraphNodeFunction::WaitFenceSync);
+    newNode->setFenceSync(event);
+}
+
 // Dumps the command graph into a dot file that works with graphviz.
 void CommandGraph::dumpGraphDotFile(std::ostream &out) const
 {