Reland "Metal: Hold refs for input buffers from bindBuffer calls."
This is a reland of a75dc33a677f130df1b24855df2f30fccb2e5137
Original change's description:
> Metal: Hold refs for input buffers from bindBuffer calls.
>
> Mirrors what we have in Vulkan and Direct3D.
> Also adds command buffer tracking, again like Vulkan and Direct3D.
>
> Change-Id: I2280d92274d81830aec7950afc64a0147e38c317
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/305396
> Reviewed-by: Greg Daniel <egdaniel@google.com>
> Commit-Queue: Jim Van Verth <jvanverth@google.com>
Change-Id: I26ebad195fe044c82ea9ad5684ef1ac6d03cbc37
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/306537
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Commit-Queue: Jim Van Verth <jvanverth@google.com>
diff --git a/src/gpu/mtl/GrMtlBuffer.h b/src/gpu/mtl/GrMtlBuffer.h
index 7f11d78..1376823 100644
--- a/src/gpu/mtl/GrMtlBuffer.h
+++ b/src/gpu/mtl/GrMtlBuffer.h
@@ -25,7 +25,6 @@
id<MTLBuffer> mtlBuffer() const { return fMtlBuffer; }
size_t offset() const { return fOffset; }
- void bind(); // for initial binding of XferGpuToCpu buffers
protected:
GrMtlBuffer(GrMtlGpu*, size_t size, GrGpuBufferType intendedType, GrAccessPattern);
diff --git a/src/gpu/mtl/GrMtlBuffer.mm b/src/gpu/mtl/GrMtlBuffer.mm
index 8f0843b..46fea8e 100644
--- a/src/gpu/mtl/GrMtlBuffer.mm
+++ b/src/gpu/mtl/GrMtlBuffer.mm
@@ -35,21 +35,26 @@
: INHERITED(gpu, size, intendedType, accessPattern)
, fIsDynamic(accessPattern != kStatic_GrAccessPattern)
, fOffset(0) {
- // In most cases, we'll allocate dynamic buffers when we map them, below.
- if (!fIsDynamic) {
- NSUInteger options = 0;
- if (@available(macOS 10.11, iOS 9.0, *)) {
+ NSUInteger options = 0;
+ if (@available(macOS 10.11, iOS 9.0, *)) {
+ if (fIsDynamic) {
+#ifdef SK_BUILD_FOR_MAC
+ options |= MTLResourceStorageModeManaged;
+#else
+ options |= MTLResourceStorageModeShared;
+#endif
+ } else {
options |= MTLResourceStorageModePrivate;
}
-#ifdef SK_BUILD_FOR_MAC
- // Mac requires 4-byte alignment for copies so we need
- // to ensure we have space for the extra data
- size = SkAlign4(size);
-#endif
- fMtlBuffer = size == 0 ? nil :
- [gpu->device() newBufferWithLength: size
- options: options];
}
+#ifdef SK_BUILD_FOR_MAC
+ // Mac requires 4-byte alignment for copies so we need
+ // to ensure we have space for the extra data
+ size = SkAlign4(size);
+#endif
+ fMtlBuffer = size == 0 ? nil :
+ [gpu->device() newBufferWithLength: size
+ options: options];
this->registerWithCache(SkBudgeted::kYes);
VALIDATE();
}
@@ -60,11 +65,6 @@
SkASSERT(fMapPtr == nullptr);
}
-void GrMtlBuffer::bind() {
- SkASSERT(fIsDynamic && GrGpuBufferType::kXferGpuToCpu == this->intendedType());
- fMtlBuffer = this->mtlGpu()->resourceProvider().getDynamicBuffer(this->size(), &fOffset);
-}
-
bool GrMtlBuffer::onUpdateData(const void* src, size_t srcInBytes) {
if (!fIsDynamic) {
if (fMtlBuffer == nil) {
@@ -122,9 +122,6 @@
VALIDATE();
SkASSERT(!this->isMapped());
if (fIsDynamic) {
- if (GrGpuBufferType::kXferGpuToCpu != this->intendedType()) {
- fMtlBuffer = this->mtlGpu()->resourceProvider().getDynamicBuffer(sizeInBytes, &fOffset);
- }
fMappedBuffer = fMtlBuffer;
fMapPtr = static_cast<char*>(fMtlBuffer.contents) + fOffset;
} else {
diff --git a/src/gpu/mtl/GrMtlCommandBuffer.h b/src/gpu/mtl/GrMtlCommandBuffer.h
index 959cff5..50373fb 100644
--- a/src/gpu/mtl/GrMtlCommandBuffer.h
+++ b/src/gpu/mtl/GrMtlCommandBuffer.h
@@ -11,15 +11,16 @@
#import <Metal/Metal.h>
#include "include/core/SkRefCnt.h"
+#include "src/gpu/GrBuffer.h"
#include "src/gpu/mtl/GrMtlUtil.h"
class GrMtlGpu;
class GrMtlPipelineState;
class GrMtlOpsRenderPass;
-class GrMtlCommandBuffer {
+class GrMtlCommandBuffer : public SkRefCnt {
public:
- static GrMtlCommandBuffer* Create(id<MTLCommandQueue> queue);
+ static sk_sp<GrMtlCommandBuffer> Make(id<MTLCommandQueue> queue);
~GrMtlCommandBuffer();
void commit(bool waitUntilCompleted);
@@ -33,10 +34,16 @@
[fCmdBuffer addCompletedHandler:block];
}
+ void addGrBuffer(sk_sp<const GrBuffer> buffer) {
+ fTrackedGrBuffers.push_back(std::move(buffer));
+ }
+
void encodeSignalEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
void encodeWaitForEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
private:
+ static const int kInitialTrackedResourcesCount = 32;
+
GrMtlCommandBuffer(id<MTLCommandBuffer> cmdBuffer)
: fCmdBuffer(cmdBuffer)
, fPreviousRenderPassDescriptor(nil) {}
@@ -47,6 +54,8 @@
id<MTLBlitCommandEncoder> fActiveBlitCommandEncoder;
id<MTLRenderCommandEncoder> fActiveRenderCommandEncoder;
MTLRenderPassDescriptor* fPreviousRenderPassDescriptor;
+
+ SkSTArray<kInitialTrackedResourcesCount, sk_sp<const GrBuffer>> fTrackedGrBuffers;
};
#endif
diff --git a/src/gpu/mtl/GrMtlCommandBuffer.mm b/src/gpu/mtl/GrMtlCommandBuffer.mm
index 85b5328..7c04021 100644
--- a/src/gpu/mtl/GrMtlCommandBuffer.mm
+++ b/src/gpu/mtl/GrMtlCommandBuffer.mm
@@ -15,7 +15,7 @@
#error This file must be compiled with Arc. Use -fobjc-arc flag
#endif
-GrMtlCommandBuffer* GrMtlCommandBuffer::Create(id<MTLCommandQueue> queue) {
+sk_sp<GrMtlCommandBuffer> GrMtlCommandBuffer::Make(id<MTLCommandQueue> queue) {
id<MTLCommandBuffer> mtlCommandBuffer;
mtlCommandBuffer = [queue commandBuffer];
if (nil == mtlCommandBuffer) {
@@ -24,11 +24,12 @@
mtlCommandBuffer.label = @"GrMtlCommandBuffer::Create";
- return new GrMtlCommandBuffer(mtlCommandBuffer);
+ return sk_sp<GrMtlCommandBuffer>(new GrMtlCommandBuffer(mtlCommandBuffer));
}
GrMtlCommandBuffer::~GrMtlCommandBuffer() {
this->endAllEncoding();
+ fTrackedGrBuffers.reset();
fCmdBuffer = nil;
}
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 5299ecb..6dbc566 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -8,6 +8,7 @@
#ifndef GrMtlGpu_DEFINED
#define GrMtlGpu_DEFINED
+#include "include/private/SkDeque.h"
#include "src/gpu/GrFinishCallbacks.h"
#include "src/gpu/GrGpu.h"
#include "src/gpu/GrRenderTarget.h"
@@ -15,6 +16,7 @@
#include "src/gpu/GrTexture.h"
#include "src/gpu/mtl/GrMtlCaps.h"
+#include "src/gpu/mtl/GrMtlCommandBuffer.h"
#include "src/gpu/mtl/GrMtlResourceProvider.h"
#include "src/gpu/mtl/GrMtlStencilAttachment.h"
#include "src/gpu/mtl/GrMtlUtil.h"
@@ -52,11 +54,6 @@
kSkip_SyncQueue
};
- // Commits the current command buffer to the queue and then creates a new command buffer. If
- // sync is set to kForce_SyncQueue, the function will wait for all work in the committed
- // command buffer to finish before returning.
- void submitCommandBuffer(SyncQueue sync);
-
void deleteBackendTexture(const GrBackendTexture&) override;
bool compile(const GrProgramDesc&, const GrProgramInfo&) override;
@@ -214,6 +211,13 @@
bool onSubmitToGpu(bool syncCpu) override;
+ // Commits the current command buffer to the queue and then creates a new command buffer. If
+ // sync is set to kForce_SyncQueue, the function will wait for all work in the committed
+ // command buffer to finish before returning.
+ void submitCommandBuffer(SyncQueue sync);
+
+ void checkForFinishedCommandBuffers();
+
// Function that uploads data onto textures with private storage mode (GPU access only).
bool uploadToTexture(GrMtlTexture* tex, int left, int top, int width, int height,
GrColorType dataColorType, const GrMipLevel texels[], int mipLevels);
@@ -247,7 +251,16 @@
id<MTLDevice> fDevice;
id<MTLCommandQueue> fQueue;
- GrMtlCommandBuffer* fCmdBuffer;
+ sk_sp<GrMtlCommandBuffer> fCurrentCmdBuffer;
+
+ struct OutstandingCommandBuffer {
+ OutstandingCommandBuffer(sk_sp<GrMtlCommandBuffer> commandBuffer, GrFence fence)
+ : fCommandBuffer(std::move(commandBuffer))
+ , fFence(fence) {}
+ sk_sp<GrMtlCommandBuffer> fCommandBuffer;
+ GrFence fFence;
+ };
+ SkDeque fOutstandingCommandBuffers;
std::unique_ptr<SkSL::Compiler> fCompiler;
diff --git a/src/gpu/mtl/GrMtlGpu.mm b/src/gpu/mtl/GrMtlGpu.mm
index 0449781..932c694 100644
--- a/src/gpu/mtl/GrMtlGpu.mm
+++ b/src/gpu/mtl/GrMtlGpu.mm
@@ -112,12 +112,18 @@
return sk_sp<GrGpu>(new GrMtlGpu(direct, options, device, queue, featureSet));
}
+// This constant determines how many OutstandingCommandBuffers are allocated together as a block in
+// the deque. As such it needs to balance allocating too much memory vs. incurring
+// allocation/deallocation thrashing. It should roughly correspond to the max number of outstanding
+// command buffers we expect to see.
+static const int kDefaultOutstandingAllocCnt = 8;
+
GrMtlGpu::GrMtlGpu(GrDirectContext* direct, const GrContextOptions& options,
id<MTLDevice> device, id<MTLCommandQueue> queue, MTLFeatureSet featureSet)
: INHERITED(direct)
, fDevice(device)
, fQueue(queue)
- , fCmdBuffer(nullptr)
+ , fOutstandingCommandBuffers(sizeof(OutstandingCommandBuffer), kDefaultOutstandingAllocCnt)
, fCompiler(new SkSL::Compiler())
, fResourceProvider(this)
, fDisconnected(false)
@@ -135,24 +141,25 @@
void GrMtlGpu::disconnect(DisconnectType type) {
INHERITED::disconnect(type);
- if (DisconnectType::kCleanup == type) {
+ if (!fDisconnected) {
this->destroyResources();
- } else {
- delete fCmdBuffer;
- fCmdBuffer = nullptr;
-
- fResourceProvider.destroyResources();
-
- fQueue = nil;
- fDevice = nil;
-
fDisconnected = true;
}
}
void GrMtlGpu::destroyResources() {
- // Will implicitly delete the command buffer
this->submitCommandBuffer(SyncQueue::kForce_SyncQueue);
+
+ // We used a placement new for each object in fOutstandingCommandBuffers, so we're responsible
+ // for calling the destructor on each of them as well.
+ while (!fOutstandingCommandBuffers.empty()) {
+ OutstandingCommandBuffer* buffer =
+ (OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
+ this->deleteFence(buffer->fFence);
+ buffer->~OutstandingCommandBuffer();
+ fOutstandingCommandBuffers.pop_front();
+ }
+
fResourceProvider.destroyResources();
fQueue = nil;
@@ -175,18 +182,44 @@
}
GrMtlCommandBuffer* GrMtlGpu::commandBuffer() {
- if (!fCmdBuffer) {
- fCmdBuffer = GrMtlCommandBuffer::Create(fQueue);
+ if (!fCurrentCmdBuffer) {
+ fCurrentCmdBuffer = GrMtlCommandBuffer::Make(fQueue);
+
+ // This should be done after we have a new command buffer in case the freeing of any
+ // resources held by a finished command buffer causes us to send a new command to the gpu
+ // (like changing the resource state).
+ this->checkForFinishedCommandBuffers();
}
- return fCmdBuffer;
+ return fCurrentCmdBuffer.get();
}
void GrMtlGpu::submitCommandBuffer(SyncQueue sync) {
- if (fCmdBuffer) {
- fResourceProvider.addBufferCompletionHandler(fCmdBuffer);
- fCmdBuffer->commit(SyncQueue::kForce_SyncQueue == sync);
- delete fCmdBuffer;
- fCmdBuffer = nullptr;
+ // TODO: handle sync with empty command buffer
+ if (fCurrentCmdBuffer) {
+ fResourceProvider.addBufferCompletionHandler(fCurrentCmdBuffer.get());
+
+ GrFence fence = this->insertFence();
+ new (fOutstandingCommandBuffers.push_back()) OutstandingCommandBuffer(
+ fCurrentCmdBuffer, fence);
+
+ fCurrentCmdBuffer->commit(SyncQueue::kForce_SyncQueue == sync);
+ fCurrentCmdBuffer.reset();
+ }
+}
+
+void GrMtlGpu::checkForFinishedCommandBuffers() {
+ // Iterate over all the outstanding command buffers to see if any have finished. The command
+ // buffers are in order from oldest to newest, so we start at the front to check if their fence
+ // has signaled. If so we pop it off and move onto the next.
+ // Repeat till we find a command list that has not finished yet (and all others afterwards are
+ // also guaranteed to not have finished).
+ OutstandingCommandBuffer* front = (OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
+ while (front && this->waitFence(front->fFence)) {
+ // Since we used placement new we are responsible for calling the destructor manually.
+ this->deleteFence(front->fFence);
+ front->~OutstandingCommandBuffer();
+ fOutstandingCommandBuffers.pop_front();
+ front = (OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
}
}
@@ -1254,7 +1287,6 @@
}
GrMtlBuffer* grMtlBuffer = static_cast<GrMtlBuffer*>(transferBuffer);
- grMtlBuffer->bind();
size_t transBufferRowBytes = bpp * width;
size_t transBufferImageBytes = transBufferRowBytes * height;
diff --git a/src/gpu/mtl/GrMtlOpsRenderPass.mm b/src/gpu/mtl/GrMtlOpsRenderPass.mm
index aa06422..b74717c 100644
--- a/src/gpu/mtl/GrMtlOpsRenderPass.mm
+++ b/src/gpu/mtl/GrMtlOpsRenderPass.mm
@@ -263,6 +263,7 @@
SkASSERT(!vertexBuffer->isCpuBuffer());
SkASSERT(!static_cast<const GrGpuBuffer*>(vertexBuffer.get())->isMapped());
fActiveVertexBuffer = std::move(vertexBuffer);
+ fGpu->commandBuffer()->addGrBuffer(fActiveVertexBuffer);
++inputBufferIndex;
}
if (instanceBuffer) {
@@ -270,11 +271,13 @@
SkASSERT(!static_cast<const GrGpuBuffer*>(instanceBuffer.get())->isMapped());
this->setVertexBuffer(fActiveRenderCmdEncoder, instanceBuffer.get(), 0, inputBufferIndex++);
fActiveInstanceBuffer = std::move(instanceBuffer);
+ fGpu->commandBuffer()->addGrBuffer(fActiveInstanceBuffer);
}
if (indexBuffer) {
SkASSERT(!indexBuffer->isCpuBuffer());
SkASSERT(!static_cast<const GrGpuBuffer*>(indexBuffer.get())->isMapped());
fActiveIndexBuffer = std::move(indexBuffer);
+ fGpu->commandBuffer()->addGrBuffer(fActiveIndexBuffer);
}
}