Vulkan: Throttle CPU to avoid excessive frame queuing

Unthrottled, the CPU can generate and queue an increasingly large number
of frames with the GPU lagging behind, especially with vsync enabled.

Assuming N swapchain images, this commit adds a wait on fence for the
Nth previous frame before submitting new work, that is the CPU is
always at most N frames ahead of the GPU.

Bug: angleproject:2908
Change-Id: Ieb2bf20168bfe9bc9d8e2219f682b01347c21dec
Reviewed-on: https://chromium-review.googlesource.com/c/1296953
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index 95e4edb..e02cd25 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -28,13 +28,15 @@
 #include "libANGLE/renderer/vulkan/vk_format_utils.h"
 #include "platform/Platform.h"
 
+#include "third_party/trace_event/trace_event.h"
+
 // Consts
 namespace
 {
 const uint32_t kMockVendorID     = 0xba5eba11;
 const uint32_t kMockDeviceID     = 0xf005ba11;
 constexpr char kMockDeviceName[] = "Vulkan Mock Device";
-constexpr size_t kInFlightCommandsLimit = 50000u;
+constexpr size_t kInFlightCommandsLimit = 100u;
 }  // anonymous namespace
 
 namespace rx
@@ -838,6 +840,8 @@
 {
     if (!mCommandGraph.empty())
     {
+        TRACE_EVENT0("gpu.angle", "RendererVk::finish");
+
         vk::Scoped<vk::CommandBuffer> commandBatch(mDevice);
         ANGLE_TRY(flushCommandGraph(context, &commandBatch.get()));
 
@@ -881,6 +885,8 @@
         garbage.destroy(mDevice);
     }
     mGarbage.clear();
+
+    mLastCompletedQueueSerial = mLastSubmittedQueueSerial;
 }
 
 angle::Result RendererVk::checkCompletedCommands(vk::Context *context)
@@ -940,15 +946,14 @@
 
     mInFlightCommands.emplace_back(scopedBatch.release());
 
-    // Check that mInFlightCommands isn't growing too fast
-    // If it is, wait for the queue to complete work it has alread been assigned
-    if (mInFlightCommands.size() > kInFlightCommandsLimit)
-    {
-        vkQueueWaitIdle(mQueue);
-    }
+    // CPU should be throttled to avoid mInFlightCommands from growing too fast.  That is done on
+    // swap() though, and there could be multiple submissions in between (through glFlush() calls),
+    // so the limit is larger than the expected number of images.
+    ASSERT(mInFlightCommands.size() <= kInFlightCommandsLimit);
 
     // Increment the queue serial. If this fails, we should restart ANGLE.
     // TODO(jmadill): Overflow check.
+    mLastSubmittedQueueSerial = mCurrentQueueSerial;
     mCurrentQueueSerial = mQueueSerialFactory.generate();
 
     ANGLE_TRY(checkCompletedCommands(context));
@@ -1042,6 +1047,8 @@
         return angle::Result::Continue();
     }
 
+    TRACE_EVENT0("gpu.angle", "RendererVk::flush");
+
     vk::Scoped<vk::CommandBuffer> commandBatch(mDevice);
     ANGLE_TRY(flushCommandGraph(context, &commandBatch.get()));