Cache CPU memory buffers used for client side arrays.
Use same cache for CPU-side copy of data when using GPU buffers.
Change-Id: I09f2837211a30aabc50e9897c090f5fbc6d90492
Reviewed-on: https://skia-review.googlesource.com/c/189484
Commit-Queue: Brian Salomon <bsalomon@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/src/gpu/GrBufferAllocPool.cpp b/src/gpu/GrBufferAllocPool.cpp
index cf986bc..36c0431 100644
--- a/src/gpu/GrBufferAllocPool.cpp
+++ b/src/gpu/GrBufferAllocPool.cpp
@@ -18,6 +18,56 @@
#include "SkSafeMath.h"
#include "SkTraceEvent.h"
+sk_sp<GrBufferAllocPool::CpuBufferCache> GrBufferAllocPool::CpuBufferCache::Make(
+ int maxBuffersToCache) {
+ return sk_sp<CpuBufferCache>(new CpuBufferCache(maxBuffersToCache));
+}
+
+GrBufferAllocPool::CpuBufferCache::CpuBufferCache(int maxBuffersToCache)
+ : fMaxBuffersToCache(maxBuffersToCache) {
+ if (fMaxBuffersToCache) {
+ fBuffers.reset(new Buffer[fMaxBuffersToCache]);
+ }
+}
+
+sk_sp<GrCpuBuffer> GrBufferAllocPool::CpuBufferCache::makeBuffer(size_t size,
+ bool mustBeInitialized) {
+ SkASSERT(size > 0);
+ Buffer* result = nullptr;
+ if (size == kDefaultBufferSize) {
+ int i = 0;
+ for (; i < fMaxBuffersToCache && fBuffers[i].fBuffer; ++i) {
+ SkASSERT(fBuffers[i].fBuffer->size() == kDefaultBufferSize);
+ if (fBuffers[i].fBuffer->unique()) {
+ result = &fBuffers[i];
+ }
+ }
+ if (!result && i < fMaxBuffersToCache) {
+ fBuffers[i].fBuffer = GrCpuBuffer::Make(size);
+ result = &fBuffers[i];
+ }
+ }
+ Buffer tempResult;
+ if (!result) {
+ tempResult.fBuffer = GrCpuBuffer::Make(size);
+ result = &tempResult;
+ }
+ if (mustBeInitialized && !result->fCleared) {
+ result->fCleared = true;
+ memset(result->fBuffer->data(), 0, result->fBuffer->size());
+ }
+ return result->fBuffer;
+}
+
+void GrBufferAllocPool::CpuBufferCache::releaseAll() {
+ for (int i = 0; i < fMaxBuffersToCache && fBuffers[i].fBuffer; ++i) {
+ fBuffers[i].fBuffer.reset();
+ fBuffers[i].fCleared = false;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
#ifdef SK_DEBUG
#define VALIDATE validate
#else
@@ -35,13 +85,12 @@
constexpr size_t GrBufferAllocPool::kDefaultBufferSize;
-GrBufferAllocPool::GrBufferAllocPool(GrGpu* gpu, GrGpuBufferType bufferType, void* initialBuffer)
- : fBlocks(8), fGpu(gpu), fBufferType(bufferType), fInitialCpuData(initialBuffer) {
- if (fInitialCpuData) {
- fCpuDataSize = kDefaultBufferSize;
- fCpuData = fInitialCpuData;
- }
-}
+GrBufferAllocPool::GrBufferAllocPool(GrGpu* gpu, GrGpuBufferType bufferType,
+ sk_sp<CpuBufferCache> cpuBufferCache)
+ : fBlocks(8)
+ , fCpuBufferCache(std::move(cpuBufferCache))
+ , fGpu(gpu)
+ , fBufferType(bufferType) {}
void GrBufferAllocPool::deleteBlocks() {
if (fBlocks.count()) {
@@ -59,9 +108,6 @@
GrBufferAllocPool::~GrBufferAllocPool() {
VALIDATE();
this->deleteBlocks();
- if (fCpuData != fInitialCpuData) {
- sk_free(fCpuData);
- }
}
void GrBufferAllocPool::reset() {
@@ -98,7 +144,7 @@
SkASSERT(!fBlocks.empty());
const GrBuffer* buffer = fBlocks.back().fBuffer.get();
if (!buffer->isCpuBuffer() && !static_cast<const GrGpuBuffer*>(buffer)->isMapped()) {
- SkASSERT(fCpuData == fBufferPtr);
+ SkASSERT(fCpuStagingBuffer && fCpuStagingBuffer->data() == fBufferPtr);
}
} else if (!fBlocks.empty()) {
const GrBuffer* buffer = fBlocks.back().fBuffer.get();
@@ -321,7 +367,8 @@
}
}
if (!fBufferPtr) {
- fBufferPtr = this->resetCpuData(block.fBytesFree);
+ this->resetCpuData(block.fBytesFree);
+ fBufferPtr = fCpuStagingBuffer->data();
}
VALIDATE(true);
@@ -337,30 +384,26 @@
fBufferPtr = nullptr;
}
-void* GrBufferAllocPool::resetCpuData(size_t newSize) {
- if (newSize <= fCpuDataSize) {
- SkASSERT(!newSize || fCpuData);
- return fCpuData;
+void GrBufferAllocPool::resetCpuData(size_t newSize) {
+ SkASSERT(newSize >= kDefaultBufferSize || !newSize);
+ if (!newSize) {
+ fCpuStagingBuffer.reset();
+ return;
}
- if (fCpuData != fInitialCpuData) {
- sk_free(fCpuData);
+ if (fCpuStagingBuffer && newSize <= fCpuStagingBuffer->size()) {
+ return;
}
- if (fGpu->caps()->mustClearUploadedBufferData()) {
- fCpuData = sk_calloc_throw(newSize);
- } else {
- fCpuData = sk_malloc_throw(newSize);
- }
- fCpuDataSize = newSize;
- return fCpuData;
+ bool mustInitialize = fGpu->caps()->mustClearUploadedBufferData();
+ fCpuStagingBuffer = fCpuBufferCache ? fCpuBufferCache->makeBuffer(newSize, mustInitialize)
+ : GrCpuBuffer::Make(newSize);
}
-
void GrBufferAllocPool::flushCpuData(const BufferBlock& block, size_t flushSize) {
SkASSERT(block.fBuffer.get());
SkASSERT(!block.fBuffer.get()->isCpuBuffer());
GrGpuBuffer* buffer = static_cast<GrGpuBuffer*>(block.fBuffer.get());
SkASSERT(!buffer->isMapped());
- SkASSERT(fCpuData == fBufferPtr);
+ SkASSERT(fCpuStagingBuffer && fCpuStagingBuffer->data() == fBufferPtr);
SkASSERT(flushSize <= buffer->size());
VALIDATE(true);
@@ -381,15 +424,17 @@
auto resourceProvider = fGpu->getContext()->priv().resourceProvider();
if (fGpu->caps()->preferClientSideDynamicBuffers()) {
- return GrCpuBuffer::Make(size);
+ bool mustInitialize = fGpu->caps()->mustClearUploadedBufferData();
+ return fCpuBufferCache ? fCpuBufferCache->makeBuffer(size, mustInitialize)
+ : GrCpuBuffer::Make(size);
}
return resourceProvider->createBuffer(size, fBufferType, kDynamic_GrAccessPattern);
}
////////////////////////////////////////////////////////////////////////////////
-GrVertexBufferAllocPool::GrVertexBufferAllocPool(GrGpu* gpu, void* initialCpuBuffer)
- : GrBufferAllocPool(gpu, GrGpuBufferType::kVertex, initialCpuBuffer) {}
+GrVertexBufferAllocPool::GrVertexBufferAllocPool(GrGpu* gpu, sk_sp<CpuBufferCache> cpuBufferCache)
+ : GrBufferAllocPool(gpu, GrGpuBufferType::kVertex, std::move(cpuBufferCache)) {}
void* GrVertexBufferAllocPool::makeSpace(size_t vertexSize,
int vertexCount,
@@ -441,8 +486,8 @@
////////////////////////////////////////////////////////////////////////////////
-GrIndexBufferAllocPool::GrIndexBufferAllocPool(GrGpu* gpu, void* initialCpuBuffer)
- : GrBufferAllocPool(gpu, GrGpuBufferType::kIndex, initialCpuBuffer) {}
+GrIndexBufferAllocPool::GrIndexBufferAllocPool(GrGpu* gpu, sk_sp<CpuBufferCache> cpuBufferCache)
+ : GrBufferAllocPool(gpu, GrGpuBufferType::kIndex, std::move(cpuBufferCache)) {}
void* GrIndexBufferAllocPool::makeSpace(int indexCount, sk_sp<const GrBuffer>* buffer,
int* startIndex) {
diff --git a/src/gpu/GrBufferAllocPool.h b/src/gpu/GrBufferAllocPool.h
index d0fda19..b499e80 100644
--- a/src/gpu/GrBufferAllocPool.h
+++ b/src/gpu/GrBufferAllocPool.h
@@ -8,14 +8,14 @@
#ifndef GrBufferAllocPool_DEFINED
#define GrBufferAllocPool_DEFINED
-#include "GrGpuBuffer.h"
+#include "GrCpuBuffer.h"
+#include "GrNonAtomicRef.h"
#include "GrTypesPriv.h"
#include "SkNoncopyable.h"
#include "SkTArray.h"
#include "SkTDArray.h"
#include "SkTypes.h"
-
class GrGpu;
/**
@@ -35,6 +35,28 @@
static constexpr size_t kDefaultBufferSize = 1 << 15;
/**
+ * A cache object that can be shared by multiple GrBufferAllocPool instances. It caches
+ * cpu buffer allocations to avoid reallocating them.
+ */
+ class CpuBufferCache : public GrNonAtomicRef<CpuBufferCache> {
+ public:
+ static sk_sp<CpuBufferCache> Make(int maxBuffersToCache);
+
+ sk_sp<GrCpuBuffer> makeBuffer(size_t size, bool mustBeInitialized);
+ void releaseAll();
+
+ private:
+ CpuBufferCache(int maxBuffersToCache);
+
+ struct Buffer {
+ sk_sp<GrCpuBuffer> fBuffer;
+ bool fCleared = false;
+ };
+ std::unique_ptr<Buffer[]> fBuffers;
+ int fMaxBuffersToCache = 0;
+ };
+
+ /**
* Ensures all buffers are unmapped and have all data written to them.
* Call before drawing using buffers from the pool.
*/
@@ -56,11 +78,11 @@
*
* @param gpu The GrGpu used to create the buffers.
* @param bufferType The type of buffers to create.
- * @param initialBuffer If non-null this should be a kDefaultBufferSize byte allocation.
- * This parameter can be used to avoid malloc/free when all
- * usages can be satisfied with default-sized buffers.
+ * @param cpuBufferCache If non-null a cache for client side array buffers
+ * or staging buffers used before data is uploaded to
+ * GPU buffer objects.
*/
- GrBufferAllocPool(GrGpu* gpu, GrGpuBufferType bufferType, void* initialBuffer);
+ GrBufferAllocPool(GrGpu* gpu, GrGpuBufferType bufferType, sk_sp<CpuBufferCache> cpuBufferCache);
virtual ~GrBufferAllocPool();
@@ -129,18 +151,17 @@
void destroyBlock();
void deleteBlocks();
void flushCpuData(const BufferBlock& block, size_t flushSize);
- void* resetCpuData(size_t newSize);
+ void resetCpuData(size_t newSize);
#ifdef SK_DEBUG
void validate(bool unusedBlockAllowed = false) const;
#endif
size_t fBytesInUse = 0;
SkTArray<BufferBlock> fBlocks;
+ sk_sp<CpuBufferCache> fCpuBufferCache;
+ sk_sp<GrCpuBuffer> fCpuStagingBuffer;
GrGpu* fGpu;
GrGpuBufferType fBufferType;
- void* fInitialCpuData = nullptr;
- void* fCpuData = nullptr;
- size_t fCpuDataSize = 0;
void* fBufferPtr = nullptr;
};
@@ -153,11 +174,11 @@
* Constructor
*
* @param gpu The GrGpu used to create the vertex buffers.
- * @param initialBuffer If non-null this should be a kDefaultBufferSize byte allocation.
- * This parameter can be used to avoid malloc/free when all
- * usages can be satisfied with default-sized buffers.
+ * @param cpuBufferCache If non-null a cache for client side array buffers
+ * or staging buffers used before data is uploaded to
+ * GPU buffer objects.
*/
- GrVertexBufferAllocPool(GrGpu* gpu, void* initialBuffer);
+ GrVertexBufferAllocPool(GrGpu* gpu, sk_sp<CpuBufferCache> cpuBufferCache);
/**
* Returns a block of memory to hold vertices. A buffer designated to hold
@@ -232,11 +253,11 @@
* Constructor
*
* @param gpu The GrGpu used to create the index buffers.
- * @param initialBuffer If non-null this should be a kDefaultBufferSize byte allocation.
- * This parameter can be used to avoid malloc/free when all
- * usages can be satisfied with default-sized buffers.
+ * @param cpuBufferCache If non-null a cache for client side array buffers
+ * or staging buffers used before data is uploaded to
+ * GPU buffer objects.
*/
- GrIndexBufferAllocPool(GrGpu* gpu, void* initialBuffer);
+ GrIndexBufferAllocPool(GrGpu* gpu, sk_sp<CpuBufferCache> cpuBufferCache);
/**
* Returns a block of memory to hold indices. A buffer designated to hold
diff --git a/src/gpu/GrDrawingManager.cpp b/src/gpu/GrDrawingManager.cpp
index 3b4c73f..29c72d1 100644
--- a/src/gpu/GrDrawingManager.cpp
+++ b/src/gpu/GrDrawingManager.cpp
@@ -229,14 +229,16 @@
fActiveOpList = nullptr;
fDAG.prepForFlush();
- SkASSERT(SkToBool(fVertexBufferSpace) == SkToBool(fIndexBufferSpace));
- if (!fVertexBufferSpace) {
- fVertexBufferSpace.reset(new char[GrBufferAllocPool::kDefaultBufferSize]());
- fIndexBufferSpace.reset(new char[GrBufferAllocPool::kDefaultBufferSize]());
+ if (!fCpuBufferCache) {
+ // We cache more buffers when the backend is using client side arrays. Otherwise, we
+ // expect each pool will use a CPU buffer as a staging buffer before uploading to a GPU
+ // buffer object. Each pool only requires one staging buffer at a time.
+ int maxCachedBuffers = fContext->priv().caps()->preferClientSideDynamicBuffers() ? 2 : 6;
+ fCpuBufferCache = GrBufferAllocPool::CpuBufferCache::Make(maxCachedBuffers);
}
GrOpFlushState flushState(gpu, fContext->priv().resourceProvider(), &fTokenTracker,
- fVertexBufferSpace.get(), fIndexBufferSpace.get());
+ fCpuBufferCache);
GrOnFlushResourceProvider onFlushProvider(this);
// TODO: AFAICT the only reason fFlushState is on GrDrawingManager rather than on the
diff --git a/src/gpu/GrDrawingManager.h b/src/gpu/GrDrawingManager.h
index 7b17917..e543ed7 100644
--- a/src/gpu/GrDrawingManager.h
+++ b/src/gpu/GrDrawingManager.h
@@ -8,6 +8,7 @@
#ifndef GrDrawingManager_DEFINED
#define GrDrawingManager_DEFINED
+#include "GrBufferAllocPool.h"
#include "GrDeferredUpload.h"
#include "GrPathRenderer.h"
#include "GrPathRendererChain.h"
@@ -161,9 +162,9 @@
GrContext* fContext;
GrPathRendererChain::Options fOptionsForPathRendererChain;
GrTextContext::Options fOptionsForTextContext;
-
- std::unique_ptr<char[]> fVertexBufferSpace;
- std::unique_ptr<char[]> fIndexBufferSpace;
+ // This cache is used by both the vertex and index pools. It reuses memory across multiple
+ // flushes.
+ sk_sp<GrBufferAllocPool::CpuBufferCache> fCpuBufferCache;
// In debug builds we guard against improper thread handling
GrSingleOwner* fSingleOwner;
diff --git a/src/gpu/GrOpFlushState.cpp b/src/gpu/GrOpFlushState.cpp
index 37ee176..7b983c4 100644
--- a/src/gpu/GrOpFlushState.cpp
+++ b/src/gpu/GrOpFlushState.cpp
@@ -16,9 +16,10 @@
//////////////////////////////////////////////////////////////////////////////
GrOpFlushState::GrOpFlushState(GrGpu* gpu, GrResourceProvider* resourceProvider,
- GrTokenTracker* tokenTracker, void* vertexSpace, void* indexSpace)
- : fVertexPool(gpu, vertexSpace)
- , fIndexPool(gpu, indexSpace)
+ GrTokenTracker* tokenTracker,
+ sk_sp<GrBufferAllocPool::CpuBufferCache> cpuBufferCache)
+ : fVertexPool(gpu, cpuBufferCache)
+ , fIndexPool(gpu, std::move(cpuBufferCache))
, fGpu(gpu)
, fResourceProvider(resourceProvider)
, fTokenTracker(tokenTracker) {}
diff --git a/src/gpu/GrOpFlushState.h b/src/gpu/GrOpFlushState.h
index 9159d49..54aa4dd 100644
--- a/src/gpu/GrOpFlushState.h
+++ b/src/gpu/GrOpFlushState.h
@@ -29,8 +29,8 @@
// vertexSpace and indexSpace may either be null or an alloation of size
// GrBufferAllocPool::kDefaultBufferSize. If the latter, then CPU memory is only allocated for
// vertices/indices when a buffer larger than kDefaultBufferSize is required.
- GrOpFlushState(GrGpu*, GrResourceProvider*, GrTokenTracker*, void* vertexSpace,
- void* indexSpace);
+ GrOpFlushState(GrGpu*, GrResourceProvider*, GrTokenTracker*,
+ sk_sp<GrBufferAllocPool::CpuBufferCache> = nullptr);
~GrOpFlushState() final { this->reset(); }