Vulkan: Adding custom pool allocator

Copied pool allocator used by compiler to common and hooking it up as
custom allocator for CommandPools. Modified it to support reallocation.

RendererVk now has a private poolAllocator and VkAllocationCallbacks
struct. The allocation callbacks are initialized to static functions
in RendererVk::initializeDevice() and then passed to CommandPool init()
and destroy() functions.

Using the pool allocator saves Command Pool/Buffer clean-up time which
was showing us as a bottleneck is some cases.

Bug: angleproject:2951
Change-Id: I81aa8a7ec60397676fa722d6435029db27947ef4
Reviewed-on: https://chromium-review.googlesource.com/c/1409867
Commit-Queue: Tobin Ehlis <tobine@google.com>
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/compiler/translator/Compiler.cpp b/src/compiler/translator/Compiler.cpp
index 95082d9..ff6c5d0 100644
--- a/src/compiler/translator/Compiler.cpp
+++ b/src/compiler/translator/Compiler.cpp
@@ -170,7 +170,7 @@
 class TScopedPoolAllocator
 {
   public:
-    TScopedPoolAllocator(TPoolAllocator *allocator) : mAllocator(allocator)
+    TScopedPoolAllocator(angle::PoolAllocator *allocator) : mAllocator(allocator)
     {
         mAllocator->push();
         SetGlobalPoolAllocator(mAllocator);
@@ -182,7 +182,7 @@
     }
 
   private:
-    TPoolAllocator *mAllocator;
+    angle::PoolAllocator *mAllocator;
 };
 
 class TScopedSymbolTableLevel
diff --git a/src/compiler/translator/Compiler.h b/src/compiler/translator/Compiler.h
index 2eef387..eef55d8 100644
--- a/src/compiler/translator/Compiler.h
+++ b/src/compiler/translator/Compiler.h
@@ -66,7 +66,7 @@
   protected:
     // Memory allocator. Allocates and tracks memory required by the compiler.
     // Deallocates all memory when compiler is destructed.
-    TPoolAllocator allocator;
+    angle::PoolAllocator allocator;
 };
 
 //
diff --git a/src/compiler/translator/PoolAlloc.cpp b/src/compiler/translator/PoolAlloc.cpp
index 5ad10c2..7a95658 100644
--- a/src/compiler/translator/PoolAlloc.cpp
+++ b/src/compiler/translator/PoolAlloc.cpp
@@ -7,14 +7,7 @@
 #include "compiler/translator/PoolAlloc.h"
 
 #include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#include "common/angleutils.h"
-#include "common/debug.h"
-#include "common/platform.h"
 #include "common/tls.h"
-#include "compiler/translator/InitializeGlobals.h"
 
 TLSIndex PoolIndex = TLS_INVALID_INDEX;
 
@@ -34,327 +27,14 @@
     PoolIndex = TLS_INVALID_INDEX;
 }
 
-TPoolAllocator *GetGlobalPoolAllocator()
+angle::PoolAllocator *GetGlobalPoolAllocator()
 {
     assert(PoolIndex != TLS_INVALID_INDEX);
-    return static_cast<TPoolAllocator *>(GetTLSValue(PoolIndex));
+    return static_cast<angle::PoolAllocator *>(GetTLSValue(PoolIndex));
 }
 
-void SetGlobalPoolAllocator(TPoolAllocator *poolAllocator)
+void SetGlobalPoolAllocator(angle::PoolAllocator *poolAllocator)
 {
     assert(PoolIndex != TLS_INVALID_INDEX);
     SetTLSValue(PoolIndex, poolAllocator);
 }
-
-//
-// Implement the functionality of the TPoolAllocator class, which
-// is documented in PoolAlloc.h.
-//
-TPoolAllocator::TPoolAllocator(int growthIncrement, int allocationAlignment)
-    : alignment(allocationAlignment),
-#if !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-      pageSize(growthIncrement),
-      freeList(0),
-      inUseList(0),
-      numCalls(0),
-      totalBytes(0),
-#endif
-      mLocked(false)
-{
-    //
-    // Adjust alignment to be at least pointer aligned and
-    // power of 2.
-    //
-    size_t minAlign = sizeof(void *);
-    alignment &= ~(minAlign - 1);
-    if (alignment < minAlign)
-        alignment = minAlign;
-    size_t a = 1;
-    while (a < alignment)
-        a <<= 1;
-    alignment     = a;
-    alignmentMask = a - 1;
-
-#if !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    //
-    // Don't allow page sizes we know are smaller than all common
-    // OS page sizes.
-    //
-    if (pageSize < 4 * 1024)
-        pageSize = 4 * 1024;
-
-    //
-    // A large currentPageOffset indicates a new page needs to
-    // be obtained to allocate memory.
-    //
-    currentPageOffset = pageSize;
-
-    //
-    // Align header skip
-    //
-    headerSkip = minAlign;
-    if (headerSkip < sizeof(tHeader))
-    {
-        headerSkip = (sizeof(tHeader) + alignmentMask) & ~alignmentMask;
-    }
-#else  // !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    mStack.push_back({});
-#endif
-}
-
-TPoolAllocator::~TPoolAllocator()
-{
-#if !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    while (inUseList)
-    {
-        tHeader *next = inUseList->nextPage;
-        inUseList->~tHeader();
-        delete[] reinterpret_cast<char *>(inUseList);
-        inUseList = next;
-    }
-
-    // We should not check the guard blocks
-    // here, because we did it already when the block was
-    // placed into the free list.
-    //
-    while (freeList)
-    {
-        tHeader *next = freeList->nextPage;
-        delete[] reinterpret_cast<char *>(freeList);
-        freeList = next;
-    }
-#else  // !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    for (auto &allocs : mStack)
-    {
-        for (auto alloc : allocs)
-        {
-            free(alloc);
-        }
-    }
-    mStack.clear();
-#endif
-}
-
-// Support MSVC++ 6.0
-const unsigned char TAllocation::guardBlockBeginVal = 0xfb;
-const unsigned char TAllocation::guardBlockEndVal   = 0xfe;
-const unsigned char TAllocation::userDataFill       = 0xcd;
-
-#ifdef GUARD_BLOCKS
-const size_t TAllocation::guardBlockSize = 16;
-#else
-const size_t TAllocation::guardBlockSize = 0;
-#endif
-
-//
-// Check a single guard block for damage
-//
-void TAllocation::checkGuardBlock(unsigned char *blockMem,
-                                  unsigned char val,
-                                  const char *locText) const
-{
-#ifdef GUARD_BLOCKS
-    for (size_t x = 0; x < guardBlockSize; x++)
-    {
-        if (blockMem[x] != val)
-        {
-            char assertMsg[80];
-
-// We don't print the assert message.  It's here just to be helpful.
-#    if defined(_MSC_VER)
-            snprintf(assertMsg, sizeof(assertMsg),
-                     "PoolAlloc: Damage %s %Iu byte allocation at 0x%p\n", locText, size, data());
-#    else
-            snprintf(assertMsg, sizeof(assertMsg),
-                     "PoolAlloc: Damage %s %zu byte allocation at 0x%p\n", locText, size, data());
-#    endif
-            assert(0 && "PoolAlloc: Damage in guard block");
-        }
-    }
-#endif
-}
-
-void TPoolAllocator::push()
-{
-#if !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    tAllocState state = {currentPageOffset, inUseList};
-
-    mStack.push_back(state);
-
-    //
-    // Indicate there is no current page to allocate from.
-    //
-    currentPageOffset = pageSize;
-#else  // !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    mStack.push_back({});
-#endif
-}
-
-//
-// Do a mass-deallocation of all the individual allocations
-// that have occurred since the last push(), or since the
-// last pop(), or since the object's creation.
-//
-// The deallocated pages are saved for future allocations.
-//
-void TPoolAllocator::pop()
-{
-    if (mStack.size() < 1)
-        return;
-
-#if !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    tHeader *page     = mStack.back().page;
-    currentPageOffset = mStack.back().offset;
-
-    while (inUseList != page)
-    {
-        // invoke destructor to free allocation list
-        inUseList->~tHeader();
-
-        tHeader *nextInUse = inUseList->nextPage;
-        if (inUseList->pageCount > 1)
-            delete[] reinterpret_cast<char *>(inUseList);
-        else
-        {
-            inUseList->nextPage = freeList;
-            freeList            = inUseList;
-        }
-        inUseList = nextInUse;
-    }
-
-    mStack.pop_back();
-#else  // !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    for (auto &alloc : mStack.back())
-    {
-        free(alloc);
-    }
-    mStack.pop_back();
-#endif
-}
-
-//
-// Do a mass-deallocation of all the individual allocations
-// that have occurred.
-//
-void TPoolAllocator::popAll()
-{
-    while (mStack.size() > 0)
-        pop();
-}
-
-void *TPoolAllocator::allocate(size_t numBytes)
-{
-    ASSERT(!mLocked);
-
-#if !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    //
-    // Just keep some interesting statistics.
-    //
-    ++numCalls;
-    totalBytes += numBytes;
-
-    // If we are using guard blocks, all allocations are bracketed by
-    // them: [guardblock][allocation][guardblock].  numBytes is how
-    // much memory the caller asked for.  allocationSize is the total
-    // size including guard blocks.  In release build,
-    // guardBlockSize=0 and this all gets optimized away.
-    size_t allocationSize = TAllocation::allocationSize(numBytes);
-    // Detect integer overflow.
-    if (allocationSize < numBytes)
-        return 0;
-
-    //
-    // Do the allocation, most likely case first, for efficiency.
-    // This step could be moved to be inline sometime.
-    //
-    if (allocationSize <= pageSize - currentPageOffset)
-    {
-        //
-        // Safe to allocate from currentPageOffset.
-        //
-        unsigned char *memory = reinterpret_cast<unsigned char *>(inUseList) + currentPageOffset;
-        currentPageOffset += allocationSize;
-        currentPageOffset = (currentPageOffset + alignmentMask) & ~alignmentMask;
-
-        return initializeAllocation(inUseList, memory, numBytes);
-    }
-
-    if (allocationSize > pageSize - headerSkip)
-    {
-        //
-        // Do a multi-page allocation.  Don't mix these with the others.
-        // The OS is efficient and allocating and free-ing multiple pages.
-        //
-        size_t numBytesToAlloc = allocationSize + headerSkip;
-        // Detect integer overflow.
-        if (numBytesToAlloc < allocationSize)
-            return 0;
-
-        tHeader *memory = reinterpret_cast<tHeader *>(::new char[numBytesToAlloc]);
-        if (memory == 0)
-            return 0;
-
-        // Use placement-new to initialize header
-        new (memory) tHeader(inUseList, (numBytesToAlloc + pageSize - 1) / pageSize);
-        inUseList = memory;
-
-        currentPageOffset = pageSize;  // make next allocation come from a new page
-
-        // No guard blocks for multi-page allocations (yet)
-        return reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(memory) + headerSkip);
-    }
-
-    //
-    // Need a simple page to allocate from.
-    //
-    tHeader *memory;
-    if (freeList)
-    {
-        memory   = freeList;
-        freeList = freeList->nextPage;
-    }
-    else
-    {
-        memory = reinterpret_cast<tHeader *>(::new char[pageSize]);
-        if (memory == 0)
-            return 0;
-    }
-
-    // Use placement-new to initialize header
-    new (memory) tHeader(inUseList, 1);
-    inUseList = memory;
-
-    unsigned char *ret = reinterpret_cast<unsigned char *>(inUseList) + headerSkip;
-    currentPageOffset  = (headerSkip + allocationSize + alignmentMask) & ~alignmentMask;
-
-    return initializeAllocation(inUseList, ret, numBytes);
-#else  // !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    void *alloc = malloc(numBytes + alignmentMask);
-    mStack.back().push_back(alloc);
-
-    intptr_t intAlloc = reinterpret_cast<intptr_t>(alloc);
-    intAlloc = (intAlloc + alignmentMask) & ~alignmentMask;
-    return reinterpret_cast<void *>(intAlloc);
-#endif
-}
-
-void TPoolAllocator::lock()
-{
-    ASSERT(!mLocked);
-    mLocked = true;
-}
-
-void TPoolAllocator::unlock()
-{
-    ASSERT(mLocked);
-    mLocked = false;
-}
-
-//
-// Check all allocations in a list for damage by calling check on each.
-//
-void TAllocation::checkAllocList() const
-{
-    for (const TAllocation *alloc = this; alloc != 0; alloc = alloc->prevAlloc)
-        alloc->check();
-}
diff --git a/src/compiler/translator/PoolAlloc.h b/src/compiler/translator/PoolAlloc.h
index 56c1d0f..d25b7b2 100644
--- a/src/compiler/translator/PoolAlloc.h
+++ b/src/compiler/translator/PoolAlloc.h
@@ -12,237 +12,26 @@
 #endif
 
 //
-// This header defines an allocator that can be used to efficiently
-// allocate a large number of small requests for heap memory, with the
-// intention that they are not individually deallocated, but rather
-// collectively deallocated at one time.
-//
-// This simultaneously
-//
-// * Makes each individual allocation much more efficient; the
-//     typical allocation is trivial.
-// * Completely avoids the cost of doing individual deallocation.
-// * Saves the trouble of tracking down and plugging a large class of leaks.
-//
-// Individual classes can use this allocator by supplying their own
-// new and delete methods.
-//
-// STL containers can use this allocator by using the pool_allocator
+// This header defines the pool_allocator class that allows STL containers
+// to use the angle::PoolAllocator class by using the pool_allocator
 // class as the allocator (second) template argument.
 //
+// It also defines functions for managing the GlobalPoolAllocator used by the compiler.
+//
 
 #include <stddef.h>
 #include <string.h>
 #include <vector>
 
-// If we are using guard blocks, we must track each indivual
-// allocation.  If we aren't using guard blocks, these
-// never get instantiated, so won't have any impact.
-//
-
-class TAllocation
-{
-  public:
-    TAllocation(size_t size, unsigned char *mem, TAllocation *prev = 0)
-        : size(size), mem(mem), prevAlloc(prev)
-    {
-// Allocations are bracketed:
-//    [allocationHeader][initialGuardBlock][userData][finalGuardBlock]
-// This would be cleaner with if (guardBlockSize)..., but that
-// makes the compiler print warnings about 0 length memsets,
-// even with the if() protecting them.
-#ifdef GUARD_BLOCKS
-        memset(preGuard(), guardBlockBeginVal, guardBlockSize);
-        memset(data(), userDataFill, size);
-        memset(postGuard(), guardBlockEndVal, guardBlockSize);
-#endif
-    }
-
-    void check() const
-    {
-        checkGuardBlock(preGuard(), guardBlockBeginVal, "before");
-        checkGuardBlock(postGuard(), guardBlockEndVal, "after");
-    }
-
-    void checkAllocList() const;
-
-    // Return total size needed to accomodate user buffer of 'size',
-    // plus our tracking data.
-    inline static size_t allocationSize(size_t size)
-    {
-        return size + 2 * guardBlockSize + headerSize();
-    }
-
-    // Offset from surrounding buffer to get to user data buffer.
-    inline static unsigned char *offsetAllocation(unsigned char *m)
-    {
-        return m + guardBlockSize + headerSize();
-    }
-
-  private:
-    void checkGuardBlock(unsigned char *blockMem, unsigned char val, const char *locText) const;
-
-    // Find offsets to pre and post guard blocks, and user data buffer
-    unsigned char *preGuard() const { return mem + headerSize(); }
-    unsigned char *data() const { return preGuard() + guardBlockSize; }
-    unsigned char *postGuard() const { return data() + size; }
-
-    size_t size;             // size of the user data area
-    unsigned char *mem;      // beginning of our allocation (pts to header)
-    TAllocation *prevAlloc;  // prior allocation in the chain
-
-    // Support MSVC++ 6.0
-    const static unsigned char guardBlockBeginVal;
-    const static unsigned char guardBlockEndVal;
-    const static unsigned char userDataFill;
-
-    const static size_t guardBlockSize;
-#ifdef GUARD_BLOCKS
-    inline static size_t headerSize() { return sizeof(TAllocation); }
-#else
-    inline static size_t headerSize() { return 0; }
-#endif
-};
-
-//
-// There are several stacks.  One is to track the pushing and popping
-// of the user, and not yet implemented.  The others are simply a
-// repositories of free pages or used pages.
-//
-// Page stacks are linked together with a simple header at the beginning
-// of each allocation obtained from the underlying OS.  Multi-page allocations
-// are returned to the OS.  Individual page allocations are kept for future
-// re-use.
-//
-// The "page size" used is not, nor must it match, the underlying OS
-// page size.  But, having it be about that size or equal to a set of
-// pages is likely most optimal.
-//
-class TPoolAllocator
-{
-  public:
-    TPoolAllocator(int growthIncrement = 8 * 1024, int allocationAlignment = 16);
-
-    //
-    // Don't call the destructor just to free up the memory, call pop()
-    //
-    ~TPoolAllocator();
-
-    //
-    // Call push() to establish a new place to pop memory too.  Does not
-    // have to be called to get things started.
-    //
-    void push();
-
-    //
-    // Call pop() to free all memory allocated since the last call to push(),
-    // or if no last call to push, frees all memory since first allocation.
-    //
-    void pop();
-
-    //
-    // Call popAll() to free all memory allocated.
-    //
-    void popAll();
-
-    //
-    // Call allocate() to actually acquire memory.  Returns 0 if no memory
-    // available, otherwise a properly aligned pointer to 'numBytes' of memory.
-    //
-    void *allocate(size_t numBytes);
-
-    //
-    // There is no deallocate.  The point of this class is that
-    // deallocation can be skipped by the user of it, as the model
-    // of use is to simultaneously deallocate everything at once
-    // by calling pop(), and to not have to solve memory leak problems.
-    //
-
-    // Catch unwanted allocations.
-    // TODO(jmadill): Remove this when we remove the global allocator.
-    void lock();
-    void unlock();
-
-  private:
-    size_t alignment;  // all returned allocations will be aligned at
-                       // this granularity, which will be a power of 2
-    size_t alignmentMask;
-
-#if !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    friend struct tHeader;
-
-    struct tHeader
-    {
-        tHeader(tHeader *nextPage, size_t pageCount)
-            : nextPage(nextPage),
-              pageCount(pageCount)
-#    ifdef GUARD_BLOCKS
-              ,
-              lastAllocation(0)
-#    endif
-        {}
-
-        ~tHeader()
-        {
-#    ifdef GUARD_BLOCKS
-            if (lastAllocation)
-                lastAllocation->checkAllocList();
-#    endif
-        }
-
-        tHeader *nextPage;
-        size_t pageCount;
-#    ifdef GUARD_BLOCKS
-        TAllocation *lastAllocation;
-#    endif
-    };
-
-    struct tAllocState
-    {
-        size_t offset;
-        tHeader *page;
-    };
-    typedef std::vector<tAllocState> tAllocStack;
-
-    // Track allocations if and only if we're using guard blocks
-    void *initializeAllocation(tHeader *block, unsigned char *memory, size_t numBytes)
-    {
-#    ifdef GUARD_BLOCKS
-        new (memory) TAllocation(numBytes, memory, block->lastAllocation);
-        block->lastAllocation = reinterpret_cast<TAllocation *>(memory);
-#    endif
-        // This is optimized entirely away if GUARD_BLOCKS is not defined.
-        return TAllocation::offsetAllocation(memory);
-    }
-
-    size_t pageSize;           // granularity of allocation from the OS
-    size_t headerSkip;         // amount of memory to skip to make room for the
-                               //      header (basically, size of header, rounded
-                               //      up to make it aligned
-    size_t currentPageOffset;  // next offset in top of inUseList to allocate from
-    tHeader *freeList;         // list of popped memory
-    tHeader *inUseList;        // list of all memory currently being used
-    tAllocStack mStack;        // stack of where to allocate from, to partition pool
-
-    int numCalls;       // just an interesting statistic
-    size_t totalBytes;  // just an interesting statistic
-
-#else  // !defined(ANGLE_TRANSLATOR_DISABLE_POOL_ALLOC)
-    std::vector<std::vector<void *>> mStack;
-#endif
-
-    TPoolAllocator &operator=(const TPoolAllocator &);  // dont allow assignment operator
-    TPoolAllocator(const TPoolAllocator &);             // dont allow default copy constructor
-    bool mLocked;
-};
+#include "common/PoolAlloc.h"
 
 //
 // There could potentially be many pools with pops happening at
 // different times.  But a simple use is to have a global pop
 // with everyone using the same global allocator.
 //
-extern TPoolAllocator *GetGlobalPoolAllocator();
-extern void SetGlobalPoolAllocator(TPoolAllocator *poolAllocator);
+extern angle::PoolAllocator *GetGlobalPoolAllocator();
+extern void SetGlobalPoolAllocator(angle::PoolAllocator *poolAllocator);
 
 //
 // This STL compatible allocator is intended to be used as the allocator
@@ -311,7 +100,7 @@
     size_type max_size() const { return static_cast<size_type>(-1) / sizeof(T); }
     size_type max_size(int size) const { return static_cast<size_type>(-1) / size; }
 
-    TPoolAllocator &getAllocator() const { return *GetGlobalPoolAllocator(); }
+    angle::PoolAllocator &getAllocator() const { return *GetGlobalPoolAllocator(); }
 };
 
 #endif  // COMPILER_TRANSLATOR_POOLALLOC_H_