Vulkan: Adding custom pool allocator

Copied pool allocator used by compiler to common and hooking it up as
custom allocator for CommandPools. Modified it to support reallocation.

RendererVk now has a private poolAllocator and VkAllocationCallbacks
struct. The allocation callbacks are initialized to static functions
in RendererVk::initializeDevice() and then passed to CommandPool init()
and destroy() functions.

Using the pool allocator saves Command Pool/Buffer clean-up time which
was showing us as a bottleneck is some cases.

Bug: angleproject:2951
Change-Id: I81aa8a7ec60397676fa722d6435029db27947ef4
Reviewed-on: https://chromium-review.googlesource.com/c/1409867
Commit-Queue: Tobin Ehlis <tobine@google.com>
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index ede6f91..4a10454 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -458,6 +458,54 @@
 // Initially dumping the command graphs is disabled.
 constexpr bool kEnableCommandGraphDiagnostics = false;
 
+// Custom allocation functions
+VKAPI_ATTR void *VKAPI_CALL PoolAllocationFunction(void *pUserData,
+                                                   size_t size,
+                                                   size_t alignment,
+                                                   VkSystemAllocationScope allocationScope)
+{
+    angle::PoolAllocator *poolAllocator = static_cast<angle::PoolAllocator *>(pUserData);
+
+    ASSERT((angle::PoolAllocator::kDefaultAlignment % alignment) == 0);
+    return poolAllocator->allocate(size);
+}
+
+VKAPI_ATTR void *VKAPI_CALL PoolReallocationFunction(void *pUserData,
+                                                     void *pOriginal,
+                                                     size_t size,
+                                                     size_t alignment,
+                                                     VkSystemAllocationScope allocationScope)
+{
+    angle::PoolAllocator *poolAllocator = static_cast<angle::PoolAllocator *>(pUserData);
+    return poolAllocator->reallocate(pOriginal, size);
+}
+
+VKAPI_ATTR void VKAPI_CALL PoolFreeFunction(void *pUserData, void *pMemory) {}
+
+VKAPI_ATTR void VKAPI_CALL
+PoolInternalAllocationNotification(void *pUserData,
+                                   size_t size,
+                                   VkInternalAllocationType allocationType,
+                                   VkSystemAllocationScope allocationScope)
+{}
+
+VKAPI_ATTR void VKAPI_CALL PoolInternalFreeNotification(void *pUserData,
+                                                        size_t size,
+                                                        VkInternalAllocationType allocationType,
+                                                        VkSystemAllocationScope allocationScope)
+{}
+
+void InitPoolAllocationCallbacks(angle::PoolAllocator *poolAllocator,
+                                 VkAllocationCallbacks *allocationCallbacks)
+{
+    allocationCallbacks->pUserData             = static_cast<void *>(poolAllocator);
+    allocationCallbacks->pfnAllocation         = &PoolAllocationFunction;
+    allocationCallbacks->pfnReallocation       = &PoolReallocationFunction;
+    allocationCallbacks->pfnFree               = &PoolFreeFunction;
+    allocationCallbacks->pfnInternalAllocation = &PoolInternalAllocationNotification;
+    allocationCallbacks->pfnInternalFree       = &PoolInternalFreeNotification;
+}
+
 }  // anonymous namespace
 
 // CommandBatch implementation.
@@ -466,20 +514,25 @@
 RendererVk::CommandBatch::~CommandBatch() = default;
 
 RendererVk::CommandBatch::CommandBatch(CommandBatch &&other)
-    : commandPool(std::move(other.commandPool)), fence(std::move(other.fence)), serial(other.serial)
+    : commandPool(std::move(other.commandPool)),
+      poolAllocator(std::move(other.poolAllocator)),
+      fence(std::move(other.fence)),
+      serial(other.serial)
 {}
 
 RendererVk::CommandBatch &RendererVk::CommandBatch::operator=(CommandBatch &&other)
 {
     std::swap(commandPool, other.commandPool);
+    std::swap(poolAllocator, other.poolAllocator);
     std::swap(fence, other.fence);
     std::swap(serial, other.serial);
     return *this;
 }
 
-void RendererVk::CommandBatch::destroy(VkDevice device)
+void RendererVk::CommandBatch::destroy(VkDevice device,
+                                       const VkAllocationCallbacks *allocationCallbacks)
 {
-    commandPool.destroy(device);
+    commandPool.destroy(device, allocationCallbacks);
     fence.destroy(device);
 }
 
@@ -535,7 +588,7 @@
 
     if (mCommandPool.valid())
     {
-        mCommandPool.destroy(mDevice);
+        mCommandPool.destroy(mDevice, &mAllocationCallbacks);
     }
 
     if (mDevice)
@@ -877,13 +930,14 @@
 
     vkGetDeviceQueue(mDevice, mCurrentQueueFamilyIndex, 0, &mQueue);
 
+    InitPoolAllocationCallbacks(&mPoolAllocator, &mAllocationCallbacks);
     // Initialize the command pool now that we know the queue family index.
     VkCommandPoolCreateInfo commandPoolInfo = {};
     commandPoolInfo.sType                   = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
     commandPoolInfo.flags                   = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
     commandPoolInfo.queueFamilyIndex        = mCurrentQueueFamilyIndex;
 
-    ANGLE_VK_TRY(displayVk, mCommandPool.init(mDevice, commandPoolInfo));
+    ANGLE_VK_TRY(displayVk, mCommandPool.init(mDevice, commandPoolInfo, &mAllocationCallbacks));
 
     // Initialize the vulkan pipeline cache.
     ANGLE_TRY(initPipelineCache(displayVk));
@@ -1234,7 +1288,7 @@
             ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
         }
         batch.fence.destroy(mDevice);
-        batch.commandPool.destroy(mDevice);
+        batch.commandPool.destroy(mDevice, &mAllocationCallbacks);
     }
     mInFlightCommands.clear();
 
@@ -1264,7 +1318,7 @@
         mLastCompletedQueueSerial = batch.serial;
 
         batch.fence.destroy(mDevice);
-        batch.commandPool.destroy(mDevice);
+        batch.commandPool.destroy(mDevice, &mAllocationCallbacks);
         ++finishedCount;
     }
 
@@ -1295,7 +1349,7 @@
     fenceInfo.sType             = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
     fenceInfo.flags             = 0;
 
-    vk::Scoped<CommandBatch> scopedBatch(mDevice);
+    vk::ScopedCustomAllocation<CommandBatch> scopedBatch(mDevice, &mAllocationCallbacks);
     CommandBatch &batch = scopedBatch.get();
     ANGLE_VK_TRY(context, batch.fence.init(mDevice, fenceInfo));
 
@@ -1303,6 +1357,7 @@
 
     // Store this command buffer in the in-flight list.
     batch.commandPool = std::move(mCommandPool);
+    batch.poolAllocator = std::move(mPoolAllocator);
     batch.serial      = mCurrentQueueSerial;
 
     mInFlightCommands.emplace_back(scopedBatch.release());
@@ -1327,12 +1382,13 @@
 
     // Reallocate the command pool for next frame.
     // TODO(jmadill): Consider reusing command pools.
+    InitPoolAllocationCallbacks(&mPoolAllocator, &mAllocationCallbacks);
     VkCommandPoolCreateInfo poolInfo = {};
     poolInfo.sType                   = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
     poolInfo.flags                   = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
     poolInfo.queueFamilyIndex        = mCurrentQueueFamilyIndex;
 
-    ANGLE_VK_TRY(context, mCommandPool.init(mDevice, poolInfo));
+    ANGLE_VK_TRY(context, mCommandPool.init(mDevice, poolInfo, &mAllocationCallbacks));
     return angle::Result::Continue;
 }
 
@@ -1675,7 +1731,7 @@
     //
     //     Post-submission work             Begin execution
     //
-    //            ????                    Write timstamp Tgpu
+    //            ????                    Write timestamp Tgpu
     //
     //            ????                       End execution
     //