Vulkan: Remaining disjoint timer query functions

The following features where missing in c2b576d9e:

- glGetIntegerv with GL_GPU_DISJOINT_EXT: this is currently impossible
  to query in Vulkan, so 0 is always returned.
- glGetIntegerv with GL_TIMESTAMP_EXT: this is a way to query GPU
  timestamp without performing flushes or waiting for the GPU to finish.
  There is no direct correspondance in Vulkan; it's implemented by
  making a small submission, with no dependency to other submissions, in
  which there is only a timestamp query.

Bug: angleproject:2885
Change-Id: I2341bd610db9084c26b6421c6f8949950ffa4de8
Reviewed-on: https://chromium-review.googlesource.com/c/1299873
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index 1f59031..e1c8809 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -895,15 +895,18 @@
 
     if (mGpuEventsEnabled)
     {
-        // Recalculate the CPU/GPU time difference to account for clock drifting.  Note that
-        // currently, the perftest event handler does not correctly handle out of order gpu and sync
-        // events, so make sure all gpu events are completed.  This loop should in practice execute
-        // once since the queue is already idle.
+        // This loop should in practice execute once since the queue is already idle.
         while (mInFlightGpuEventQueries.size() > 0)
         {
             ANGLE_TRY(checkCompletedGpuEvents(context));
         }
-        ANGLE_TRY(synchronizeCpuGpuTime(context));
+        // Recalculate the CPU/GPU time difference to account for clock drifting.  Avoid unnecessary
+        // synchronization if there is no event to be adjusted (happens when finish() gets called
+        // multiple times towards the end of the application).
+        if (mGpuEvents.size() > 0)
+        {
+            ANGLE_TRY(synchronizeCpuGpuTime(context));
+        }
     }
 
     return angle::Result::Continue();
@@ -1007,9 +1010,9 @@
     // Reallocate the command pool for next frame.
     // TODO(jmadill): Consider reusing command pools.
     VkCommandPoolCreateInfo poolInfo = {};
-    poolInfo.sType            = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+    poolInfo.sType                   = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
     poolInfo.flags                   = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
-    poolInfo.queueFamilyIndex = mCurrentQueueFamilyIndex;
+    poolInfo.queueFamilyIndex        = mCurrentQueueFamilyIndex;
 
     return mCommandPool.init(context, poolInfo);
 }
@@ -1236,6 +1239,106 @@
     return &mShaderLibrary;
 }
 
+angle::Result RendererVk::getTimestamp(vk::Context *context, uint64_t *timestampOut)
+{
+    // The intent of this function is to query the timestamp without stalling the GPU.  Currently,
+    // that seems impossible, so instead, we are going to make a small submission with just a
+    // timestamp query.  First, the disjoint timer query extension says:
+    //
+    // > This will return the GL time after all previous commands have reached the GL server but
+    // have not yet necessarily executed.
+    //
+    // The previous commands are stored in the command graph at the moment and are not yet flushed.
+    // The wording allows us to make a submission to get the timestamp without performing a flush.
+    //
+    // Second:
+    //
+    // > By using a combination of this synchronous get command and the asynchronous timestamp query
+    // object target, applications can measure the latency between when commands reach the GL server
+    // and when they are realized in the framebuffer.
+    //
+    // This fits with the above strategy as well, although inevitably we are possibly introducing a
+    // GPU bubble.  This function directly generates a command buffer and submits it instead of
+    // using the other member functions.  This is to avoid changing any state, such as the queue
+    // serial.
+
+    // Create a query used to receive the GPU timestamp
+    vk::Scoped<vk::DynamicQueryPool> timestampQueryPool(mDevice);
+    vk::QueryHelper timestampQuery;
+    ANGLE_TRY(timestampQueryPool.get().init(context, VK_QUERY_TYPE_TIMESTAMP, 1));
+    ANGLE_TRY(timestampQueryPool.get().allocateQuery(context, &timestampQuery));
+
+    // Record the command buffer
+    vk::Scoped<vk::CommandBuffer> commandBatch(mDevice);
+    vk::CommandBuffer &commandBuffer = commandBatch.get();
+
+    VkCommandBufferAllocateInfo commandBufferInfo = {};
+    commandBufferInfo.sType                       = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+    commandBufferInfo.commandPool                 = mCommandPool.getHandle();
+    commandBufferInfo.level                       = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+    commandBufferInfo.commandBufferCount          = 1;
+
+    ANGLE_TRY(commandBuffer.init(context, commandBufferInfo));
+
+    VkCommandBufferBeginInfo beginInfo = {};
+    beginInfo.sType                    = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+    beginInfo.flags                    = 0;
+    beginInfo.pInheritanceInfo         = nullptr;
+
+    ANGLE_TRY(commandBuffer.begin(context, beginInfo));
+
+    commandBuffer.resetQueryPool(timestampQuery.getQueryPool()->getHandle(),
+                                 timestampQuery.getQuery(), 1);
+    commandBuffer.writeTimestamp(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                 timestampQuery.getQueryPool()->getHandle(),
+                                 timestampQuery.getQuery());
+
+    ANGLE_TRY(commandBuffer.end(context));
+
+    // Create fence for the submission
+    VkFenceCreateInfo fenceInfo = {};
+    fenceInfo.sType             = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+    fenceInfo.flags             = 0;
+
+    vk::Scoped<vk::Fence> fence(mDevice);
+    ANGLE_TRY(fence.get().init(context, fenceInfo));
+
+    // Submit the command buffer
+    VkSubmitInfo submitInfo         = {};
+    submitInfo.sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+    submitInfo.waitSemaphoreCount   = 0;
+    submitInfo.pWaitSemaphores      = nullptr;
+    submitInfo.pWaitDstStageMask    = nullptr;
+    submitInfo.commandBufferCount   = 1;
+    submitInfo.pCommandBuffers      = commandBuffer.ptr();
+    submitInfo.signalSemaphoreCount = 0;
+    submitInfo.pSignalSemaphores    = nullptr;
+
+    ANGLE_VK_TRY(context, vkQueueSubmit(mQueue, 1, &submitInfo, fence.get().getHandle()));
+
+    // Wait for the submission to finish.  Given no semaphores, there is hope that it would execute
+    // in parallel with what's already running on the GPU.
+    constexpr uint64_t kMaxFenceWaitTimeNs = 10'000'000'000llu;
+    angle::Result result                   = fence.get().wait(context, kMaxFenceWaitTimeNs);
+    if (result == angle::Result::Incomplete())
+    {
+        // Declare it a failure if it times out.
+        result = angle::Result::Stop();
+    }
+    ANGLE_TRY(result);
+
+    // Get the query results
+    constexpr VkQueryResultFlags queryFlags = VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT;
+
+    ANGLE_TRY(timestampQuery.getQueryPool()->getResults(context, timestampQuery.getQuery(), 1,
+                                                        sizeof(*timestampOut), timestampOut,
+                                                        sizeof(*timestampOut), queryFlags));
+
+    timestampQueryPool.get().freeQuery(context, &timestampQuery);
+
+    return angle::Result::Continue();
+}
+
 angle::Result RendererVk::synchronizeCpuGpuTime(vk::Context *context)
 {
     ASSERT(mGpuEventsEnabled);