Vulkan: Adding custom pool allocator

Copied pool allocator used by compiler to common and hooking it up as
custom allocator for CommandPools. Modified it to support reallocation.

RendererVk now has a private poolAllocator and VkAllocationCallbacks
struct. The allocation callbacks are initialized to static functions
in RendererVk::initializeDevice() and then passed to CommandPool init()
and destroy() functions.

Using the pool allocator saves Command Pool/Buffer clean-up time which
was showing us as a bottleneck is some cases.

Bug: angleproject:2951
Change-Id: I81aa8a7ec60397676fa722d6435029db27947ef4
Reviewed-on: https://chromium-review.googlesource.com/c/1409867
Commit-Queue: Tobin Ehlis <tobine@google.com>
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index ede6f91..4a10454 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -458,6 +458,54 @@
 // Initially dumping the command graphs is disabled.
 constexpr bool kEnableCommandGraphDiagnostics = false;
 
+// Custom allocation functions
+VKAPI_ATTR void *VKAPI_CALL PoolAllocationFunction(void *pUserData,
+                                                   size_t size,
+                                                   size_t alignment,
+                                                   VkSystemAllocationScope allocationScope)
+{
+    angle::PoolAllocator *poolAllocator = static_cast<angle::PoolAllocator *>(pUserData);
+
+    ASSERT((angle::PoolAllocator::kDefaultAlignment % alignment) == 0);
+    return poolAllocator->allocate(size);
+}
+
+VKAPI_ATTR void *VKAPI_CALL PoolReallocationFunction(void *pUserData,
+                                                     void *pOriginal,
+                                                     size_t size,
+                                                     size_t alignment,
+                                                     VkSystemAllocationScope allocationScope)
+{
+    angle::PoolAllocator *poolAllocator = static_cast<angle::PoolAllocator *>(pUserData);
+    return poolAllocator->reallocate(pOriginal, size);
+}
+
+VKAPI_ATTR void VKAPI_CALL PoolFreeFunction(void *pUserData, void *pMemory) {}
+
+VKAPI_ATTR void VKAPI_CALL
+PoolInternalAllocationNotification(void *pUserData,
+                                   size_t size,
+                                   VkInternalAllocationType allocationType,
+                                   VkSystemAllocationScope allocationScope)
+{}
+
+VKAPI_ATTR void VKAPI_CALL PoolInternalFreeNotification(void *pUserData,
+                                                        size_t size,
+                                                        VkInternalAllocationType allocationType,
+                                                        VkSystemAllocationScope allocationScope)
+{}
+
+void InitPoolAllocationCallbacks(angle::PoolAllocator *poolAllocator,
+                                 VkAllocationCallbacks *allocationCallbacks)
+{
+    allocationCallbacks->pUserData             = static_cast<void *>(poolAllocator);
+    allocationCallbacks->pfnAllocation         = &PoolAllocationFunction;
+    allocationCallbacks->pfnReallocation       = &PoolReallocationFunction;
+    allocationCallbacks->pfnFree               = &PoolFreeFunction;
+    allocationCallbacks->pfnInternalAllocation = &PoolInternalAllocationNotification;
+    allocationCallbacks->pfnInternalFree       = &PoolInternalFreeNotification;
+}
+
 }  // anonymous namespace
 
 // CommandBatch implementation.
@@ -466,20 +514,25 @@
 RendererVk::CommandBatch::~CommandBatch() = default;
 
 RendererVk::CommandBatch::CommandBatch(CommandBatch &&other)
-    : commandPool(std::move(other.commandPool)), fence(std::move(other.fence)), serial(other.serial)
+    : commandPool(std::move(other.commandPool)),
+      poolAllocator(std::move(other.poolAllocator)),
+      fence(std::move(other.fence)),
+      serial(other.serial)
 {}
 
 RendererVk::CommandBatch &RendererVk::CommandBatch::operator=(CommandBatch &&other)
 {
     std::swap(commandPool, other.commandPool);
+    std::swap(poolAllocator, other.poolAllocator);
     std::swap(fence, other.fence);
     std::swap(serial, other.serial);
     return *this;
 }
 
-void RendererVk::CommandBatch::destroy(VkDevice device)
+void RendererVk::CommandBatch::destroy(VkDevice device,
+                                       const VkAllocationCallbacks *allocationCallbacks)
 {
-    commandPool.destroy(device);
+    commandPool.destroy(device, allocationCallbacks);
     fence.destroy(device);
 }
 
@@ -535,7 +588,7 @@
 
     if (mCommandPool.valid())
     {
-        mCommandPool.destroy(mDevice);
+        mCommandPool.destroy(mDevice, &mAllocationCallbacks);
     }
 
     if (mDevice)
@@ -877,13 +930,14 @@
 
     vkGetDeviceQueue(mDevice, mCurrentQueueFamilyIndex, 0, &mQueue);
 
+    InitPoolAllocationCallbacks(&mPoolAllocator, &mAllocationCallbacks);
     // Initialize the command pool now that we know the queue family index.
     VkCommandPoolCreateInfo commandPoolInfo = {};
     commandPoolInfo.sType                   = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
     commandPoolInfo.flags                   = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
     commandPoolInfo.queueFamilyIndex        = mCurrentQueueFamilyIndex;
 
-    ANGLE_VK_TRY(displayVk, mCommandPool.init(mDevice, commandPoolInfo));
+    ANGLE_VK_TRY(displayVk, mCommandPool.init(mDevice, commandPoolInfo, &mAllocationCallbacks));
 
     // Initialize the vulkan pipeline cache.
     ANGLE_TRY(initPipelineCache(displayVk));
@@ -1234,7 +1288,7 @@
             ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
         }
         batch.fence.destroy(mDevice);
-        batch.commandPool.destroy(mDevice);
+        batch.commandPool.destroy(mDevice, &mAllocationCallbacks);
     }
     mInFlightCommands.clear();
 
@@ -1264,7 +1318,7 @@
         mLastCompletedQueueSerial = batch.serial;
 
         batch.fence.destroy(mDevice);
-        batch.commandPool.destroy(mDevice);
+        batch.commandPool.destroy(mDevice, &mAllocationCallbacks);
         ++finishedCount;
     }
 
@@ -1295,7 +1349,7 @@
     fenceInfo.sType             = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
     fenceInfo.flags             = 0;
 
-    vk::Scoped<CommandBatch> scopedBatch(mDevice);
+    vk::ScopedCustomAllocation<CommandBatch> scopedBatch(mDevice, &mAllocationCallbacks);
     CommandBatch &batch = scopedBatch.get();
     ANGLE_VK_TRY(context, batch.fence.init(mDevice, fenceInfo));
 
@@ -1303,6 +1357,7 @@
 
     // Store this command buffer in the in-flight list.
     batch.commandPool = std::move(mCommandPool);
+    batch.poolAllocator = std::move(mPoolAllocator);
     batch.serial      = mCurrentQueueSerial;
 
     mInFlightCommands.emplace_back(scopedBatch.release());
@@ -1327,12 +1382,13 @@
 
     // Reallocate the command pool for next frame.
     // TODO(jmadill): Consider reusing command pools.
+    InitPoolAllocationCallbacks(&mPoolAllocator, &mAllocationCallbacks);
     VkCommandPoolCreateInfo poolInfo = {};
     poolInfo.sType                   = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
     poolInfo.flags                   = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
     poolInfo.queueFamilyIndex        = mCurrentQueueFamilyIndex;
 
-    ANGLE_VK_TRY(context, mCommandPool.init(mDevice, poolInfo));
+    ANGLE_VK_TRY(context, mCommandPool.init(mDevice, poolInfo, &mAllocationCallbacks));
     return angle::Result::Continue;
 }
 
@@ -1675,7 +1731,7 @@
     //
     //     Post-submission work             Begin execution
     //
-    //            ????                    Write timstamp Tgpu
+    //            ????                    Write timestamp Tgpu
     //
     //            ????                       End execution
     //
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.h b/src/libANGLE/renderer/vulkan/RendererVk.h
index 8bd247e..73af881 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@@ -13,6 +13,7 @@
 #include <vulkan/vulkan.h>
 #include <memory>
 
+#include "common/PoolAlloc.h"
 #include "common/angleutils.h"
 #include "libANGLE/BlobCache.h"
 #include "libANGLE/Caps.h"
@@ -255,6 +256,8 @@
     uint32_t mCurrentQueueFamilyIndex;
     VkDevice mDevice;
     vk::CommandPool mCommandPool;
+    angle::PoolAllocator mPoolAllocator;
+    VkAllocationCallbacks mAllocationCallbacks;
     SerialFactory mQueueSerialFactory;
     SerialFactory mShaderSerialFactory;
     Serial mLastCompletedQueueSerial;
@@ -270,9 +273,10 @@
         CommandBatch(CommandBatch &&other);
         CommandBatch &operator=(CommandBatch &&other);
 
-        void destroy(VkDevice device);
+        void destroy(VkDevice device, const VkAllocationCallbacks *allocationCallbacks);
 
         vk::CommandPool commandPool;
+        angle::PoolAllocator poolAllocator;
         vk::Fence fence;
         Serial serial;
     };
diff --git a/src/libANGLE/renderer/vulkan/vk_utils.cpp b/src/libANGLE/renderer/vulkan/vk_utils.cpp
index be20455..4b6e832 100644
--- a/src/libANGLE/renderer/vulkan/vk_utils.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_utils.cpp
@@ -270,19 +270,21 @@
 // CommandPool implementation.
 CommandPool::CommandPool() {}
 
-void CommandPool::destroy(VkDevice device)
+void CommandPool::destroy(VkDevice device, const VkAllocationCallbacks *allocationCallbacks)
 {
     if (valid())
     {
-        vkDestroyCommandPool(device, mHandle, nullptr);
+        vkDestroyCommandPool(device, mHandle, allocationCallbacks);
         mHandle = VK_NULL_HANDLE;
     }
 }
 
-VkResult CommandPool::init(VkDevice device, const VkCommandPoolCreateInfo &createInfo)
+VkResult CommandPool::init(VkDevice device,
+                           const VkCommandPoolCreateInfo &createInfo,
+                           const VkAllocationCallbacks *allocationCallbacks)
 {
     ASSERT(!valid());
-    return vkCreateCommandPool(device, &createInfo, nullptr, &mHandle);
+    return vkCreateCommandPool(device, &createInfo, allocationCallbacks, &mHandle);
 }
 
 // CommandBuffer implementation.
diff --git a/src/libANGLE/renderer/vulkan/vk_utils.h b/src/libANGLE/renderer/vulkan/vk_utils.h
index 027e3eb..7c3b8e7 100644
--- a/src/libANGLE/renderer/vulkan/vk_utils.h
+++ b/src/libANGLE/renderer/vulkan/vk_utils.h
@@ -290,9 +290,11 @@
   public:
     CommandPool();
 
-    void destroy(VkDevice device);
+    void destroy(VkDevice device, const VkAllocationCallbacks *allocationCallbacks);
 
-    VkResult init(VkDevice device, const VkCommandPoolCreateInfo &createInfo);
+    VkResult init(VkDevice device,
+                  const VkCommandPoolCreateInfo &createInfo,
+                  const VkAllocationCallbacks *allocationCallbacks);
 };
 
 class Pipeline final : public WrappedObject<Pipeline, VkPipeline>
@@ -317,7 +319,7 @@
 
     VkCommandBuffer releaseHandle();
 
-    // This is used for normal pool allocated command buffers. It reset the handle.
+    // This is used for normal pool allocated command buffers. It resets the handle.
     void destroy(VkDevice device);
 
     // This is used in conjunction with VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT.
@@ -791,6 +793,28 @@
     T mVar;
 };
 
+// Helper class to handle RAII patterns for initialization. Requires that T have a destroy method
+// that takes a VkDevice & VkAllocationCallbacks ptr and returns void.
+template <typename T>
+class ScopedCustomAllocation final : angle::NonCopyable
+{
+  public:
+    ScopedCustomAllocation(VkDevice device, const VkAllocationCallbacks *allocationCBs)
+        : mDevice(device), mAllocationCallbacks(allocationCBs)
+    {}
+    ~ScopedCustomAllocation() { mVar.destroy(mDevice, mAllocationCallbacks); }
+
+    const T &get() const { return mVar; }
+    T &get() { return mVar; }
+
+    T &&release() { return std::move(mVar); }
+
+  private:
+    VkDevice mDevice;
+    const VkAllocationCallbacks *mAllocationCallbacks;
+    T mVar;
+};
+
 // This is a very simple RefCount class that has no autoreleasing. Used in the descriptor set and
 // pipeline layout caches.
 template <typename T>