Vulkan: add GPU trace events RendererVk now tries, as best as it can, to match the CPU and GPU timers on init as well as every finish(). A clock-sync event is generated for each such synchronization point. RendererVk::traceGpuEvent() is a new function that, given a command buffer, performs timestamp queries corresponding to GPU events. These queries are read back when done, without incurring GPU bubbles, at which point an event is generated with that timestamp. Bug: angleproject:2908 Change-Id: I08d7d11ff9f8ad6c9f9a9899767c9cd746d0623e Reviewed-on: https://chromium-review.googlesource.com/c/1296954 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: Yuly Novikov <ynovikov@chromium.org>

commit: 25224e786ffea77e8993daa8e8b9b236428cd6eb [log] [tgz]
author: Shahbaz Youssefi <syoussefi@google.com> Mon Oct 22 11:56:02 2018 -0400
committer: Commit Bot <commit-bot@chromium.org> Mon Oct 29 15:28:41 2018 +0000
tree: a1cb9d13ba33bf8dcb46ebab7f97b9e42835b66f
parent: 4a22f4b04619fd92138f4b37f861eebc2b0bfc6b [diff] [blame]
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index e02cd25..1f59031 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp

@@ -306,7 +306,10 @@
       mCurrentQueueSerial(mQueueSerialFactory.generate()),
       mDeviceLost(false),
       mPipelineCacheVkUpdateTimeout(kPipelineCacheVkUpdatePeriod),
-      mCommandGraph(kEnableCommandGraphDiagnostics)
+      mCommandGraph(kEnableCommandGraphDiagnostics),
+      mGpuEventsEnabled(false),
+      mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
+      mGpuEventTimestampOrigin(0)
 {
 }
 
@@ -330,6 +333,7 @@
     mPipelineCacheVk.destroy(mDevice);
     mSubmitSemaphorePool.destroy(mDevice);
     mShaderLibrary.destroy(mDevice);
+    mGpuEventQueryPool.destroy(mDevice);
 
     GlslangWrapper::Release();
 
@@ -624,6 +628,25 @@
     // Initialize the submission semaphore pool.
     ANGLE_TRY(mSubmitSemaphorePool.init(displayVk, vk::kDefaultSemaphorePoolSize));
 
+#if ANGLE_ENABLE_VULKAN_GPU_TRACE_EVENTS
+    angle::PlatformMethods *platform = ANGLEPlatformCurrent();
+    ASSERT(platform);
+
+    // GPU tracing workaround for anglebug.com/2927.  The renderer should not emit gpu events during
+    // platform discovery.
+    const unsigned char *gpuEventsEnabled =
+        platform->getTraceCategoryEnabledFlag(platform, "gpu.angle.gpu");
+    mGpuEventsEnabled = gpuEventsEnabled && *gpuEventsEnabled;
+#endif
+
+    if (mGpuEventsEnabled)
+    {
+        // Calculate the difference between CPU and GPU clocks for GPU event reporting.
+        ANGLE_TRY(mGpuEventQueryPool.init(displayVk, VK_QUERY_TYPE_TIMESTAMP,
+                                          vk::kDefaultTimestampQueryPoolSize));
+        ANGLE_TRY(synchronizeCpuGpuTime(displayVk));
+    }
+
     return angle::Result::Continue();
 }
 
@@ -781,11 +804,13 @@
 
 void RendererVk::getSubmitWaitSemaphores(
     vk::Context *context,
-    angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> *waitSemaphores)
+    angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> *waitSemaphores,
+    angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> *waitStageMasks)
 {
     if (mSubmitLastSignaledSemaphore.getSemaphore())
     {
         waitSemaphores->push_back(mSubmitLastSignaledSemaphore.getSemaphore()->getHandle());
+        waitStageMasks->push_back(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
 
         // Return the semaphore to the pool (which will remain valid and unused until the
         // queue it's about to be waited on has finished execution).
@@ -795,6 +820,8 @@
     for (vk::SemaphoreHelper &semaphore : mSubmitWaitSemaphores)
     {
         waitSemaphores->push_back(semaphore.getSemaphore()->getHandle());
+        waitStageMasks->push_back(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+
         mSubmitSemaphorePool.freeSemaphore(context, &semaphore);
     }
     mSubmitWaitSemaphores.clear();
@@ -845,18 +872,15 @@
         vk::Scoped<vk::CommandBuffer> commandBatch(mDevice);
         ANGLE_TRY(flushCommandGraph(context, &commandBatch.get()));
 
-        VkPipelineStageFlags waitStageMask[2] = {
-            VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-        };
-
         angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> waitSemaphores;
-        getSubmitWaitSemaphores(context, &waitSemaphores);
+        angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> waitStageMasks;
+        getSubmitWaitSemaphores(context, &waitSemaphores, &waitStageMasks);
 
         VkSubmitInfo submitInfo         = {};
         submitInfo.sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO;
         submitInfo.waitSemaphoreCount   = static_cast<uint32_t>(waitSemaphores.size());
         submitInfo.pWaitSemaphores      = waitSemaphores.data();
-        submitInfo.pWaitDstStageMask    = waitStageMask;
+        submitInfo.pWaitDstStageMask    = waitStageMasks.data();
         submitInfo.commandBufferCount   = 1;
         submitInfo.pCommandBuffers      = commandBatch.get().ptr();
         submitInfo.signalSemaphoreCount = 0;
@@ -868,6 +892,20 @@
     ASSERT(mQueue != VK_NULL_HANDLE);
     ANGLE_VK_TRY(context, vkQueueWaitIdle(mQueue));
     freeAllInFlightResources();
+
+    if (mGpuEventsEnabled)
+    {
+        // Recalculate the CPU/GPU time difference to account for clock drifting.  Note that
+        // currently, the perftest event handler does not correctly handle out of order gpu and sync
+        // events, so make sure all gpu events are completed.  This loop should in practice execute
+        // once since the queue is already idle.
+        while (mInFlightGpuEventQueries.size() > 0)
+        {
+            ANGLE_TRY(checkCompletedGpuEvents(context));
+        }
+        ANGLE_TRY(synchronizeCpuGpuTime(context));
+    }
+
     return angle::Result::Continue();
 }
 
@@ -958,6 +996,11 @@
 
     ANGLE_TRY(checkCompletedCommands(context));
 
+    if (mGpuEventsEnabled)
+    {
+        ANGLE_TRY(checkCompletedGpuEvents(context));
+    }
+
     // Simply null out the command buffer here - it was allocated using the command pool.
     commandBuffer.releaseHandle();
 
@@ -965,12 +1008,10 @@
     // TODO(jmadill): Consider reusing command pools.
     VkCommandPoolCreateInfo poolInfo = {};
     poolInfo.sType            = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-    poolInfo.flags            = 0;
+    poolInfo.flags                   = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
     poolInfo.queueFamilyIndex = mCurrentQueueFamilyIndex;
 
-    ANGLE_TRY(mCommandPool.init(context, poolInfo));
-
-    return angle::Result::Continue();
+    return mCommandPool.init(context, poolInfo);
 }
 
 bool RendererVk::isSerialInUse(Serial serial) const
@@ -1052,12 +1093,9 @@
     vk::Scoped<vk::CommandBuffer> commandBatch(mDevice);
     ANGLE_TRY(flushCommandGraph(context, &commandBatch.get()));
 
-    VkPipelineStageFlags waitStageMask[2] = {
-        VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-    };
-
     angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> waitSemaphores;
-    getSubmitWaitSemaphores(context, &waitSemaphores);
+    angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> waitStageMasks;
+    getSubmitWaitSemaphores(context, &waitSemaphores, &waitStageMasks);
 
     // On every flush, create a semaphore to be signaled.  On the next submission, this semaphore
     // will be waited on.
@@ -1067,7 +1105,7 @@
     submitInfo.sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO;
     submitInfo.waitSemaphoreCount   = static_cast<uint32_t>(waitSemaphores.size());
     submitInfo.pWaitSemaphores      = waitSemaphores.data();
-    submitInfo.pWaitDstStageMask    = waitStageMask;
+    submitInfo.pWaitDstStageMask    = waitStageMasks.data();
     submitInfo.commandBufferCount   = 1;
     submitInfo.pCommandBuffers      = commandBatch.get().ptr();
     submitInfo.signalSemaphoreCount = 1;
@@ -1198,6 +1236,346 @@
     return &mShaderLibrary;
 }
 
+angle::Result RendererVk::synchronizeCpuGpuTime(vk::Context *context)
+{
+    ASSERT(mGpuEventsEnabled);
+
+    angle::PlatformMethods *platform = ANGLEPlatformCurrent();
+    ASSERT(platform);
+
+    // To synchronize CPU and GPU times, we need to get the CPU timestamp as close as possible to
+    // the GPU timestamp.  The process of getting the GPU timestamp is as follows:
+    //
+    //             CPU                            GPU
+    //
+    //     Record command buffer
+    //     with timestamp query
+    //
+    //     Submit command buffer
+    //
+    //     Post-submission work             Begin execution
+    //
+    //            ????                    Write timstamp Tgpu
+    //
+    //            ????                       End execution
+    //
+    //            ????                    Return query results
+    //
+    //            ????
+    //
+    //       Get query results
+    //
+    // The areas of unknown work (????) on the CPU indicate that the CPU may or may not have
+    // finished post-submission work while the GPU is executing in parallel. With no further work,
+    // querying CPU timestamps before submission and after getting query results give the bounds to
+    // Tgpu, which could be quite large.
+    //
+    // Using VkEvents, the GPU can be made to wait for the CPU and vice versa, in an effort to
+    // reduce this range. This function implements the following procedure:
+    //
+    //             CPU                            GPU
+    //
+    //     Record command buffer
+    //     with timestamp query
+    //
+    //     Submit command buffer
+    //
+    //     Post-submission work             Begin execution
+    //
+    //            ????                    Set Event GPUReady
+    //
+    //    Wait on Event GPUReady         Wait on Event CPUReady
+    //
+    //       Get CPU Time Ts             Wait on Event CPUReady
+    //
+    //      Set Event CPUReady           Wait on Event CPUReady
+    //
+    //      Get CPU Time Tcpu              Get GPU Time Tgpu
+    //
+    //    Wait on Event GPUDone            Set Event GPUDone
+    //
+    //       Get CPU Time Te                 End Execution
+    //
+    //            Idle                    Return query results
+    //
+    //      Get query results
+    //
+    // If Te-Ts > epsilon, a GPU or CPU interruption can be assumed and the operation can be
+    // retried.  Once Te-Ts < epsilon, Tcpu can be taken to presumably match Tgpu.  Finding an
+    // epsilon that's valid for all devices may be difficult, so the loop can be performed only a
+    // limited number of times and the Tcpu,Tgpu pair corresponding to smallest Te-Ts used for
+    // calibration.
+    //
+    // Note: Once VK_EXT_calibrated_timestamps is ubiquitous, this should be redone.
+
+    // Make sure nothing is running
+    ASSERT(mCommandGraph.empty());
+
+    TRACE_EVENT0("gpu.angle", "RendererVk::synchronizeCpuGpuTime");
+
+    // Create a query used to receive the GPU timestamp
+    vk::QueryHelper timestampQuery;
+    ANGLE_TRY(mGpuEventQueryPool.allocateQuery(context, &timestampQuery));
+
+    // Create the three events
+    VkEventCreateInfo eventCreateInfo = {};
+    eventCreateInfo.sType             = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
+    eventCreateInfo.flags             = 0;
+
+    vk::Scoped<vk::Event> cpuReady(mDevice), gpuReady(mDevice), gpuDone(mDevice);
+    ANGLE_TRY(cpuReady.get().init(context, eventCreateInfo));
+    ANGLE_TRY(gpuReady.get().init(context, eventCreateInfo));
+    ANGLE_TRY(gpuDone.get().init(context, eventCreateInfo));
+
+    constexpr uint32_t kRetries = 10;
+
+    // Time suffixes used are S for seconds and Cycles for cycles
+    double tightestRangeS = 1e6f;
+    double TcpuS          = 0;
+    uint64_t TgpuCycles   = 0;
+    for (uint32_t i = 0; i < kRetries; ++i)
+    {
+        // Reset the events
+        ANGLE_TRY(cpuReady.get().reset(context));
+        ANGLE_TRY(gpuReady.get().reset(context));
+        ANGLE_TRY(gpuDone.get().reset(context));
+
+        // Record the command buffer
+        vk::Scoped<vk::CommandBuffer> commandBatch(mDevice);
+        vk::CommandBuffer &commandBuffer = commandBatch.get();
+
+        VkCommandBufferAllocateInfo commandBufferInfo = {};
+        commandBufferInfo.sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+        commandBufferInfo.commandPool        = mCommandPool.getHandle();
+        commandBufferInfo.level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+        commandBufferInfo.commandBufferCount = 1;
+
+        ANGLE_TRY(commandBuffer.init(context, commandBufferInfo));
+
+        VkCommandBufferBeginInfo beginInfo = {};
+        beginInfo.sType                    = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+        beginInfo.flags                    = 0;
+        beginInfo.pInheritanceInfo         = nullptr;
+
+        ANGLE_TRY(commandBuffer.begin(context, beginInfo));
+
+        commandBuffer.setEvent(gpuReady.get(), VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT);
+        commandBuffer.waitEvents(1, cpuReady.get().ptr(), VK_PIPELINE_STAGE_HOST_BIT,
+                                 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, 0, nullptr, 0,
+                                 nullptr);
+
+        commandBuffer.resetQueryPool(timestampQuery.getQueryPool()->getHandle(),
+                                     timestampQuery.getQuery(), 1);
+        commandBuffer.writeTimestamp(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                     timestampQuery.getQueryPool()->getHandle(),
+                                     timestampQuery.getQuery());
+
+        commandBuffer.setEvent(gpuDone.get(), VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT);
+
+        ANGLE_TRY(commandBuffer.end(context));
+
+        // Submit the command buffer
+        angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> waitSemaphores;
+        angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> waitStageMasks;
+        getSubmitWaitSemaphores(context, &waitSemaphores, &waitStageMasks);
+
+        VkSubmitInfo submitInfo         = {};
+        submitInfo.sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+        submitInfo.waitSemaphoreCount   = static_cast<uint32_t>(waitSemaphores.size());
+        submitInfo.pWaitSemaphores      = waitSemaphores.data();
+        submitInfo.pWaitDstStageMask    = waitStageMasks.data();
+        submitInfo.commandBufferCount   = 1;
+        submitInfo.pCommandBuffers      = commandBuffer.ptr();
+        submitInfo.signalSemaphoreCount = 0;
+        submitInfo.pSignalSemaphores    = nullptr;
+
+        ANGLE_TRY(submitFrame(context, submitInfo, std::move(commandBuffer)));
+
+        // Wait for GPU to be ready.  This is a short busy wait.
+        angle::Result result = angle::Result::Incomplete();
+        do
+        {
+            result = gpuReady.get().getStatus(context);
+            ANGLE_TRY(result);
+        } while (result == angle::Result::Incomplete());
+
+        double TsS = platform->monotonicallyIncreasingTime(platform);
+
+        // Tell the GPU to go ahead with the timestamp query.
+        ANGLE_TRY(cpuReady.get().set(context));
+        double cpuTimestampS = platform->monotonicallyIncreasingTime(platform);
+
+        // Wait for GPU to be done.  Another short busy wait.
+        do
+        {
+            result = gpuDone.get().getStatus(context);
+            ANGLE_TRY(result);
+        } while (result == angle::Result::Incomplete());
+
+        double TeS = platform->monotonicallyIncreasingTime(platform);
+
+        // Get the query results
+        ANGLE_TRY(finishToSerial(context, getLastSubmittedQueueSerial()));
+
+        constexpr VkQueryResultFlags queryFlags = VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT;
+
+        uint64_t gpuTimestampCycles = 0;
+        ANGLE_TRY(timestampQuery.getQueryPool()->getResults(
+            context, timestampQuery.getQuery(), 1, sizeof(gpuTimestampCycles), &gpuTimestampCycles,
+            sizeof(gpuTimestampCycles), queryFlags));
+
+        // Use the first timestamp queried as origin.
+        if (mGpuEventTimestampOrigin == 0)
+        {
+            mGpuEventTimestampOrigin = gpuTimestampCycles;
+        }
+
+        // Take these CPU and GPU timestamps if there is better confidence.
+        double confidenceRangeS = TeS - TsS;
+        if (confidenceRangeS < tightestRangeS)
+        {
+            tightestRangeS = confidenceRangeS;
+            TcpuS          = cpuTimestampS;
+            TgpuCycles     = gpuTimestampCycles;
+        }
+    }
+
+    mGpuEventQueryPool.freeQuery(context, &timestampQuery);
+
+    // timestampPeriod gives nanoseconds/cycle.
+    double TgpuS = (TgpuCycles - mGpuEventTimestampOrigin) *
+                   static_cast<double>(mPhysicalDeviceProperties.limits.timestampPeriod) /
+                   1'000'000'000.0;
+
+    flushGpuEvents(TgpuS, TcpuS);
+
+    mGpuClockSync.gpuTimestampS = TgpuS;
+    mGpuClockSync.cpuTimestampS = TcpuS;
+
+    return angle::Result::Continue();
+}
+
+angle::Result RendererVk::traceGpuEventImpl(vk::Context *context,
+                                            vk::CommandBuffer *commandBuffer,
+                                            char phase,
+                                            const char *name)
+{
+    ASSERT(mGpuEventsEnabled);
+
+    GpuEventQuery event;
+
+    event.name   = name;
+    event.phase  = phase;
+    event.serial = mCurrentQueueSerial;
+
+    ANGLE_TRY(mGpuEventQueryPool.allocateQuery(context, &event.queryPoolIndex, &event.queryIndex));
+
+    commandBuffer->resetQueryPool(
+        mGpuEventQueryPool.getQueryPool(event.queryPoolIndex)->getHandle(), event.queryIndex, 1);
+    commandBuffer->writeTimestamp(
+        VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+        mGpuEventQueryPool.getQueryPool(event.queryPoolIndex)->getHandle(), event.queryIndex);
+
+    mInFlightGpuEventQueries.push_back(std::move(event));
+
+    return angle::Result::Continue();
+}
+
+angle::Result RendererVk::checkCompletedGpuEvents(vk::Context *context)
+{
+    ASSERT(mGpuEventsEnabled);
+
+    angle::PlatformMethods *platform = ANGLEPlatformCurrent();
+    ASSERT(platform);
+
+    int finishedCount = 0;
+
+    for (GpuEventQuery &eventQuery : mInFlightGpuEventQueries)
+    {
+        // Only check the timestamp query if the submission has finished.
+        if (eventQuery.serial > mLastCompletedQueueSerial)
+        {
+            break;
+        }
+
+        // See if the results are available.
+        uint64_t gpuTimestampCycles = 0;
+        angle::Result result        = mGpuEventQueryPool.getQueryPool(eventQuery.queryPoolIndex)
+                                   ->getResults(context, eventQuery.queryIndex, 1,
+                                                sizeof(gpuTimestampCycles), &gpuTimestampCycles,
+                                                sizeof(gpuTimestampCycles), VK_QUERY_RESULT_64_BIT);
+        ANGLE_TRY(result);
+
+        if (result == angle::Result::Incomplete())
+        {
+            break;
+        }
+
+        mGpuEventQueryPool.freeQuery(context, eventQuery.queryPoolIndex, eventQuery.queryIndex);
+
+        GpuEvent event;
+        event.gpuTimestampCycles = gpuTimestampCycles;
+        event.name               = eventQuery.name;
+        event.phase              = eventQuery.phase;
+
+        mGpuEvents.emplace_back(event);
+
+        ++finishedCount;
+    }
+
+    mInFlightGpuEventQueries.erase(mInFlightGpuEventQueries.begin(),
+                                   mInFlightGpuEventQueries.begin() + finishedCount);
+
+    return angle::Result::Continue();
+}
+
+void RendererVk::flushGpuEvents(double nextSyncGpuTimestampS, double nextSyncCpuTimestampS)
+{
+    if (mGpuEvents.size() == 0)
+    {
+        return;
+    }
+
+    angle::PlatformMethods *platform = ANGLEPlatformCurrent();
+    ASSERT(platform);
+
+    // Find the slope of the clock drift for adjustment
+    double lastGpuSyncTimeS  = mGpuClockSync.gpuTimestampS;
+    double lastGpuSyncDiffS  = mGpuClockSync.cpuTimestampS - mGpuClockSync.gpuTimestampS;
+    double gpuSyncDriftSlope = 0;
+
+    double nextGpuSyncTimeS = nextSyncGpuTimestampS;
+    double nextGpuSyncDiffS = nextSyncCpuTimestampS - nextSyncGpuTimestampS;
+
+    // No gpu trace events should have been generated before the clock sync, so if there is no
+    // "previous" clock sync, there should be no gpu events (i.e. the function early-outs above).
+    ASSERT(mGpuClockSync.gpuTimestampS != std::numeric_limits<double>::max() &&
+           mGpuClockSync.cpuTimestampS != std::numeric_limits<double>::max());
+
+    gpuSyncDriftSlope =
+        (nextGpuSyncDiffS - lastGpuSyncDiffS) / (nextGpuSyncTimeS - lastGpuSyncTimeS);
+
+    for (const GpuEvent &event : mGpuEvents)
+    {
+        double gpuTimestampS =
+            (event.gpuTimestampCycles - mGpuEventTimestampOrigin) *
+            static_cast<double>(mPhysicalDeviceProperties.limits.timestampPeriod) * 1e-9;
+
+        // Account for clock drift.
+        gpuTimestampS += lastGpuSyncDiffS + gpuSyncDriftSlope * (gpuTimestampS - lastGpuSyncTimeS);
+
+        // Generate the trace now that the GPU timestamp is available and clock drifts are accounted
+        // for.
+        static long long eventId = 1;
+        static const unsigned char *categoryEnabled =
+            TRACE_EVENT_API_GET_CATEGORY_ENABLED("gpu.angle.gpu");
+        platform->addTraceEvent(platform, event.phase, categoryEnabled, event.name, eventId++,
+                                gpuTimestampS, 0, nullptr, nullptr, nullptr, TRACE_EVENT_FLAG_NONE);
+    }
+
+    mGpuEvents.clear();
+}
+
 uint32_t GetUniformBufferDescriptorCount()
 {
     return kUniformBufferDescriptorsPerDescriptorSet;
commit	25224e786ffea77e8993daa8e8b9b236428cd6eb	[log] [tgz]
author	Shahbaz Youssefi <syoussefi@google.com>	Mon Oct 22 11:56:02 2018 -0400
committer	Commit Bot <commit-bot@chromium.org>	Mon Oct 29 15:28:41 2018 +0000
tree	a1cb9d13ba33bf8dcb46ebab7f97b9e42835b66f
parent	4a22f4b04619fd92138f4b37f861eebc2b0bfc6b [diff] [blame]