Vulkan: Convert vertex attributes in compute
In this commit, VertexArrayVk::convertVertexBuffer() is renamed to
VertexArrayVk::convertVertexBufferCpu() to explicitly show it does a CPU
readback. A new VertexArrayVk::convertVertexBuffer() function is added
that has the same functionality in gpu (with some assumptions, where the
CPU fallback is used should those assumptions fail). Currently, the
only requirement is that buffer offset/stride are divided by the
component size.
ConvertVertex.comp is the shader responsible for this conversion, and it
implements the functionality in renderer/copyvertex.inc, minus a few
functions that are not used in the Vulkan backend.
Bug: angleproject:2958, angleproject:3009
Change-Id: I8ec9a5f4672509bcf7b9e352cd27663970ad4653
Reviewed-on: https://chromium-review.googlesource.com/c/1364451
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp b/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
index 09a8fd9..5cd018d 100644
--- a/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
+++ b/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
@@ -23,12 +23,11 @@
{
namespace
{
-constexpr size_t kDynamicVertexDataSize = 1024 * 1024;
-constexpr size_t kDynamicIndexDataSize = 1024 * 8;
-constexpr size_t kMaxVertexFormatAlignment = 4;
-constexpr VkBufferUsageFlags kVertexBufferUsageFlags = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
+constexpr size_t kDynamicVertexDataSize = 1024 * 1024;
+constexpr size_t kDynamicIndexDataSize = 1024 * 8;
+constexpr size_t kMaxVertexFormatAlignment = 4;
+constexpr VkBufferUsageFlags kVertexBufferUsageFlags =
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr VkBufferUsageFlags kIndexBufferUsageFlags = VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
@@ -60,6 +59,23 @@
return angle::Result::Continue;
}
+size_t GetVertexCount(BufferVk *srcBuffer, const gl::VertexBinding &binding, uint32_t srcFormatSize)
+{
+ // Bytes usable for vertex data.
+ GLint64 bytes = srcBuffer->getSize() - binding.getOffset();
+ if (bytes < srcFormatSize)
+ return 0;
+
+ // Count the last vertex. It may occupy less than a full stride.
+ size_t numVertices = 1;
+ bytes -= srcFormatSize;
+
+ // Count how many strides fit remaining space.
+ if (bytes > 0)
+ numVertices += static_cast<size_t>(bytes) / binding.getStride();
+
+ return numVertices;
+}
} // anonymous namespace
#define INIT \
@@ -174,10 +190,60 @@
// and/or align it as we copy it to a DynamicBuffer. The assumption could be wrong
// but the alternative of copying it piecemeal on each draw would have a lot more
// overhead.
-angle::Result VertexArrayVk::convertVertexBuffer(ContextVk *contextVk,
- BufferVk *srcBuffer,
- const gl::VertexBinding &binding,
- size_t attribIndex)
+angle::Result VertexArrayVk::convertVertexBufferGpu(ContextVk *contextVk,
+ BufferVk *srcBuffer,
+ const gl::VertexBinding &binding,
+ size_t attribIndex)
+{
+ RendererVk *renderer = contextVk->getRenderer();
+
+ const angle::Format &srcFormat = mCurrentArrayBufferFormats[attribIndex]->angleFormat();
+ const angle::Format &destFormat = mCurrentArrayBufferFormats[attribIndex]->bufferFormat();
+
+ ASSERT(binding.getStride() % (srcFormat.pixelBytes / srcFormat.channelCount()) == 0);
+
+ unsigned srcFormatSize = srcFormat.pixelBytes;
+ unsigned destFormatSize = destFormat.pixelBytes;
+
+ size_t numVertices = GetVertexCount(srcBuffer, binding, srcFormatSize);
+ if (numVertices == 0)
+ {
+ return angle::Result::Continue;
+ }
+
+ ASSERT(GetVertexInputAlignment(*mCurrentArrayBufferFormats[attribIndex]) <=
+ kMaxVertexFormatAlignment);
+
+ // Allocate buffer for results
+ mCurrentArrayBufferConversion[attribIndex].releaseRetainedBuffers(renderer);
+ ANGLE_TRY(mCurrentArrayBufferConversion[attribIndex].allocate(
+ contextVk, numVertices * destFormatSize, nullptr, nullptr,
+ &mCurrentArrayBufferOffsets[attribIndex], nullptr));
+ mCurrentArrayBuffers[attribIndex] =
+ mCurrentArrayBufferConversion[attribIndex].getCurrentBuffer();
+
+ DispatchUtilsVk::ConvertVertexParameters params;
+ params.vertexCount = numVertices;
+ params.srcFormat = &srcFormat;
+ params.destFormat = &destFormat;
+ params.srcStride = binding.getStride();
+ params.srcOffset = binding.getOffset();
+ params.destOffset = static_cast<size_t>(mCurrentArrayBufferOffsets[attribIndex]);
+
+ ANGLE_TRY(renderer->getDispatchUtils().convertVertexBuffer(
+ contextVk, mCurrentArrayBuffers[attribIndex], &srcBuffer->getBuffer(), params));
+
+ mCurrentArrayBufferHandles[attribIndex] =
+ mCurrentArrayBuffers[attribIndex]->getBuffer().getHandle();
+ mCurrentArrayBufferConversionCanRelease[attribIndex] = true;
+
+ return angle::Result::Continue;
+}
+
+angle::Result VertexArrayVk::convertVertexBufferCpu(ContextVk *contextVk,
+ BufferVk *srcBuffer,
+ const gl::VertexBinding &binding,
+ size_t attribIndex)
{
// Needed before reading buffer or we could get stale data.
ANGLE_TRY(contextVk->getRenderer()->finish(contextVk));
@@ -187,18 +253,11 @@
mCurrentArrayBufferConversion[attribIndex].releaseRetainedBuffers(contextVk->getRenderer());
- // Bytes usable for vertex data.
- GLint64 bytes = srcBuffer->getSize() - binding.getOffset();
- if (bytes < srcFormatSize)
+ size_t numVertices = GetVertexCount(srcBuffer, binding, srcFormatSize);
+ if (numVertices == 0)
+ {
return angle::Result::Continue;
-
- // Count the last vertex. It may occupy less than a full stride.
- size_t numVertices = 1;
- bytes -= srcFormatSize;
-
- // Count how many strides fit remaining space.
- if (bytes > 0)
- numVertices += static_cast<size_t>(bytes) / binding.getStride();
+ }
void *src = nullptr;
ANGLE_TRY(srcBuffer->mapImpl(contextVk, &src));
@@ -322,8 +381,9 @@
// Invalidate the input description for pipelines.
mDirtyPackedInputs.set(attribIndex);
- RendererVk *renderer = contextVk->getRenderer();
- bool releaseConversion = true;
+ RendererVk *renderer = contextVk->getRenderer();
+ bool releaseConversion = true;
+ bool anyVertexBufferConvertedOnGpu = false;
if (attrib.enabled)
{
@@ -335,14 +395,25 @@
BufferVk *bufferVk = vk::GetImpl(bufferGL);
unsigned componentSize =
mCurrentArrayBufferFormats[attribIndex]->angleFormat().pixelBytes / attrib.size;
+ bool bindingIsAligned = BindingIsAligned(binding, componentSize);
if (mCurrentArrayBufferFormats[attribIndex]->vertexLoadRequiresConversion ||
- !BindingIsAligned(binding, componentSize))
+ !bindingIsAligned)
{
mCurrentArrayBufferStrides[attribIndex] =
mCurrentArrayBufferFormats[attribIndex]->bufferFormat().pixelBytes;
- ANGLE_TRY(convertVertexBuffer(contextVk, bufferVk, binding, attribIndex));
+ if (bindingIsAligned)
+ {
+ ANGLE_TRY(convertVertexBufferGpu(contextVk, bufferVk, binding, attribIndex));
+ anyVertexBufferConvertedOnGpu = true;
+ }
+ else
+ {
+ // TODO(syoussefi): Implement unaligned vertex buffer conversions in compute.
+ // http://anglebug.com/3009
+ ANGLE_TRY(convertVertexBufferCpu(contextVk, bufferVk, binding, attribIndex));
+ }
releaseConversion = false;
}
@@ -377,6 +448,11 @@
&renderer->getFormat(angle::FormatID::R32G32B32A32_FLOAT);
}
+ if (anyVertexBufferConvertedOnGpu && renderer->getFeatures().flushAfterVertexConversion)
+ {
+ ANGLE_TRY(renderer->flush(contextVk));
+ }
+
if (releaseConversion)
ensureConversionReleased(renderer, attribIndex);
@@ -603,7 +679,7 @@
// Note: this is a copy, which implicitly converts between formats. Once support for
// primitive restart is added, a specialized shader is likely needed to special case 0xFF ->
// 0xFFFF.
- ANGLE_TRY(renderer->getDispatchUtils()->copyBuffer(contextVk, dest, src, params));
+ ANGLE_TRY(renderer->getDispatchUtils().copyBuffer(contextVk, dest, src, params));
}
else
{