Vulkan: Faster state transitions.
Implements a transition table from Pipeline Cache entry to
state change neighbouring Pipeline Cache entries. We use
a 64-bit mask to do a quick scan over the pipeline desc.
This ends up being a lot faster than doing a full hash
and memcmp over the pipeline description.
Note that there could be future optimizations to this design.
We might keep a hash map of the pipeline transitions instead
of a list. Or use a sorted list. This could speed up the search
when there are many transitions for cache entries. Also we could
skip the transition table and opt to do a full hash when there
are more than a configurable number of dirty states. This might
be a bit faster in some cases. Likely this will be something we
can add performance tests for in the future.
Documentation is also added in a README file for the Vulkan back
end. This will be extended over time.
Improves performance about 30-35% on the VBO state change test.
Bug: angleproject:3013
Change-Id: I793f9e3efd8887acf00ad60e4ac2502a54c95dee
Reviewed-on: https://chromium-review.googlesource.com/c/1369287
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp
index bfd3b18..2f3899c 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -109,6 +109,7 @@
ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk *renderer)
: ContextImpl(state, errorSet),
vk::Context(renderer),
+ mCurrentPipeline(nullptr),
mCurrentDrawMode(gl::PrimitiveMode::InvalidEnum),
mVertexArray(nullptr),
mDrawFramebuffer(nullptr),
@@ -253,7 +254,7 @@
{
invalidateCurrentPipeline();
mCurrentDrawMode = mode;
- mGraphicsPipelineDesc->updateTopology(mCurrentDrawMode);
+ mGraphicsPipelineDesc->updateTopology(&mGraphicsPipelineTransition, mCurrentDrawMode);
}
if (!mDrawFramebuffer->appendToStartedRenderPass(mRenderer, commandBufferOut))
@@ -365,13 +366,34 @@
{
if (!mCurrentPipeline)
{
+ const vk::GraphicsPipelineDesc *descPtr;
+
// Draw call shader patching, shader compilation, and pipeline cache query.
ANGLE_TRY(mProgram->getGraphicsPipeline(this, mCurrentDrawMode, *mGraphicsPipelineDesc,
mProgram->getState().getActiveAttribLocationsMask(),
- &mCurrentPipeline));
+ &descPtr, &mCurrentPipeline));
+ mGraphicsPipelineTransition.reset();
+ }
+ else if (mGraphicsPipelineTransition.any())
+ {
+ if (!mCurrentPipeline->findTransition(mGraphicsPipelineTransition, *mGraphicsPipelineDesc,
+ &mCurrentPipeline))
+ {
+ vk::PipelineHelper *oldPipeline = mCurrentPipeline;
+
+ const vk::GraphicsPipelineDesc *descPtr;
+
+ ANGLE_TRY(mProgram->getGraphicsPipeline(
+ this, mCurrentDrawMode, *mGraphicsPipelineDesc,
+ mProgram->getState().getActiveAttribLocationsMask(), &descPtr, &mCurrentPipeline));
+
+ oldPipeline->addTransition(mGraphicsPipelineTransition, descPtr, mCurrentPipeline);
+ }
+
+ mGraphicsPipelineTransition.reset();
}
- commandBuffer->bindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, mCurrentPipeline->get());
+ commandBuffer->bindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, mCurrentPipeline->getPipeline());
// Update the queue serial for the pipeline object.
ASSERT(mCurrentPipeline && mCurrentPipeline->valid());
@@ -623,7 +645,7 @@
blendState.colorMaskBlue, blendState.colorMaskAlpha);
FramebufferVk *framebufferVk = vk::GetImpl(mState.getDrawFramebuffer());
- mGraphicsPipelineDesc->updateColorWriteMask(mClearColorMask,
+ mGraphicsPipelineDesc->updateColorWriteMask(&mGraphicsPipelineTransition, mClearColorMask,
framebufferVk->getEmulatedAlphaAttachmentMask());
}
@@ -689,16 +711,20 @@
updateDepthRange(glState.getNearPlane(), glState.getFarPlane());
break;
case gl::State::DIRTY_BIT_BLEND_ENABLED:
- mGraphicsPipelineDesc->updateBlendEnabled(glState.isBlendEnabled());
+ mGraphicsPipelineDesc->updateBlendEnabled(&mGraphicsPipelineTransition,
+ glState.isBlendEnabled());
break;
case gl::State::DIRTY_BIT_BLEND_COLOR:
- mGraphicsPipelineDesc->updateBlendColor(glState.getBlendColor());
+ mGraphicsPipelineDesc->updateBlendColor(&mGraphicsPipelineTransition,
+ glState.getBlendColor());
break;
case gl::State::DIRTY_BIT_BLEND_FUNCS:
- mGraphicsPipelineDesc->updateBlendFuncs(glState.getBlendState());
+ mGraphicsPipelineDesc->updateBlendFuncs(&mGraphicsPipelineTransition,
+ glState.getBlendState());
break;
case gl::State::DIRTY_BIT_BLEND_EQUATIONS:
- mGraphicsPipelineDesc->updateBlendEquations(glState.getBlendState());
+ mGraphicsPipelineDesc->updateBlendEquations(&mGraphicsPipelineTransition,
+ glState.getBlendState());
break;
case gl::State::DIRTY_BIT_COLOR_MASK:
updateColorMask(glState.getBlendState());
@@ -714,61 +740,75 @@
case gl::State::DIRTY_BIT_SAMPLE_MASK:
break;
case gl::State::DIRTY_BIT_DEPTH_TEST_ENABLED:
- mGraphicsPipelineDesc->updateDepthTestEnabled(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateDepthTestEnabled(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_DEPTH_FUNC:
- mGraphicsPipelineDesc->updateDepthFunc(glState.getDepthStencilState());
+ mGraphicsPipelineDesc->updateDepthFunc(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_DEPTH_MASK:
- mGraphicsPipelineDesc->updateDepthWriteEnabled(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateDepthWriteEnabled(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_STENCIL_TEST_ENABLED:
- mGraphicsPipelineDesc->updateStencilTestEnabled(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateStencilTestEnabled(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_STENCIL_FUNCS_FRONT:
- mGraphicsPipelineDesc->updateStencilFrontFuncs(glState.getStencilRef(),
+ mGraphicsPipelineDesc->updateStencilFrontFuncs(&mGraphicsPipelineTransition,
+ glState.getStencilRef(),
glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_FUNCS_BACK:
- mGraphicsPipelineDesc->updateStencilBackFuncs(glState.getStencilBackRef(),
+ mGraphicsPipelineDesc->updateStencilBackFuncs(&mGraphicsPipelineTransition,
+ glState.getStencilBackRef(),
glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_OPS_FRONT:
- mGraphicsPipelineDesc->updateStencilFrontOps(glState.getDepthStencilState());
+ mGraphicsPipelineDesc->updateStencilFrontOps(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_OPS_BACK:
- mGraphicsPipelineDesc->updateStencilBackOps(glState.getDepthStencilState());
+ mGraphicsPipelineDesc->updateStencilBackOps(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState());
break;
case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_FRONT:
- mGraphicsPipelineDesc->updateStencilFrontWriteMask(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateStencilFrontWriteMask(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_BACK:
- mGraphicsPipelineDesc->updateStencilBackWriteMask(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateStencilBackWriteMask(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
break;
case gl::State::DIRTY_BIT_CULL_FACE_ENABLED:
case gl::State::DIRTY_BIT_CULL_FACE:
- mGraphicsPipelineDesc->updateCullMode(glState.getRasterizerState());
+ mGraphicsPipelineDesc->updateCullMode(&mGraphicsPipelineTransition,
+ glState.getRasterizerState());
break;
case gl::State::DIRTY_BIT_FRONT_FACE:
- mGraphicsPipelineDesc->updateFrontFace(glState.getRasterizerState(),
+ mGraphicsPipelineDesc->updateFrontFace(&mGraphicsPipelineTransition,
+ glState.getRasterizerState(),
isViewportFlipEnabledForDrawFBO());
break;
case gl::State::DIRTY_BIT_POLYGON_OFFSET_FILL_ENABLED:
mGraphicsPipelineDesc->updatePolygonOffsetFillEnabled(
- glState.isPolygonOffsetFillEnabled());
+ &mGraphicsPipelineTransition, glState.isPolygonOffsetFillEnabled());
break;
case gl::State::DIRTY_BIT_POLYGON_OFFSET:
- mGraphicsPipelineDesc->updatePolygonOffset(glState.getRasterizerState());
+ mGraphicsPipelineDesc->updatePolygonOffset(&mGraphicsPipelineTransition,
+ glState.getRasterizerState());
break;
case gl::State::DIRTY_BIT_RASTERIZER_DISCARD_ENABLED:
break;
case gl::State::DIRTY_BIT_LINE_WIDTH:
- mGraphicsPipelineDesc->updateLineWidth(glState.getLineWidth());
+ mGraphicsPipelineDesc->updateLineWidth(&mGraphicsPipelineTransition,
+ glState.getLineWidth());
break;
case gl::State::DIRTY_BIT_PRIMITIVE_RESTART_ENABLED:
break;
@@ -813,19 +853,26 @@
updateViewport(mDrawFramebuffer, glState.getViewport(), glState.getNearPlane(),
glState.getFarPlane(), isViewportFlipEnabledForDrawFBO());
updateColorMask(glState.getBlendState());
- mGraphicsPipelineDesc->updateCullMode(glState.getRasterizerState());
+ mGraphicsPipelineDesc->updateCullMode(&mGraphicsPipelineTransition,
+ glState.getRasterizerState());
updateScissor(glState);
- mGraphicsPipelineDesc->updateDepthTestEnabled(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateDepthTestEnabled(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
- mGraphicsPipelineDesc->updateDepthWriteEnabled(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateDepthWriteEnabled(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
- mGraphicsPipelineDesc->updateStencilTestEnabled(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateStencilTestEnabled(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
- mGraphicsPipelineDesc->updateStencilFrontWriteMask(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateStencilFrontWriteMask(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
- mGraphicsPipelineDesc->updateStencilBackWriteMask(glState.getDepthStencilState(),
+ mGraphicsPipelineDesc->updateStencilBackWriteMask(&mGraphicsPipelineTransition,
+ glState.getDepthStencilState(),
glState.getDrawFramebuffer());
- mGraphicsPipelineDesc->updateRenderPassDesc(mDrawFramebuffer->getRenderPassDesc());
+ mGraphicsPipelineDesc->updateRenderPassDesc(&mGraphicsPipelineTransition,
+ mDrawFramebuffer->getRenderPassDesc());
break;
}
case gl::State::DIRTY_BIT_RENDERBUFFER_BINDING:
@@ -851,6 +898,8 @@
bool useVertexBuffer = (mProgram->getState().getMaxActiveAttribLocation());
mNonIndexedDirtyBitsMask.set(DIRTY_BIT_VERTEX_BUFFERS, useVertexBuffer);
mIndexedDirtyBitsMask.set(DIRTY_BIT_VERTEX_BUFFERS, useVertexBuffer);
+ mCurrentPipeline = nullptr;
+ mGraphicsPipelineTransition.reset();
break;
}
case gl::State::DIRTY_BIT_TEXTURE_BINDINGS:
@@ -1054,7 +1103,7 @@
{
// Ensure that the RenderPass description is updated.
invalidateCurrentPipeline();
- mGraphicsPipelineDesc->updateRenderPassDesc(renderPassDesc);
+ mGraphicsPipelineDesc->updateRenderPassDesc(&mGraphicsPipelineTransition, renderPassDesc);
}
angle::Result ContextVk::dispatchCompute(const gl::Context *context,
@@ -1279,4 +1328,5 @@
static_cast<uint32_t>(offset));
return angle::Result::Continue;
}
+
} // namespace rx
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h
index 453e5d2..ac350fd 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -179,7 +179,8 @@
GLuint relativeOffset)
{
invalidateVertexAndIndexBuffers();
- mGraphicsPipelineDesc->updateVertexInput(static_cast<uint32_t>(attribIndex), stride,
+ mGraphicsPipelineDesc->updateVertexInput(&mGraphicsPipelineTransition,
+ static_cast<uint32_t>(attribIndex), stride,
divisor, format, relativeOffset);
}
@@ -270,7 +271,6 @@
mDirtyBits.set(DIRTY_BIT_PIPELINE);
mDirtyBits.set(DIRTY_BIT_VIEWPORT);
mDirtyBits.set(DIRTY_BIT_SCISSOR);
- mCurrentPipeline = nullptr;
}
void invalidateCurrentTextures();
@@ -291,12 +291,13 @@
angle::Result handleDirtyViewport(const gl::Context *context, vk::CommandBuffer *commandBuffer);
angle::Result handleDirtyScissor(const gl::Context *context, vk::CommandBuffer *commandBuffer);
- vk::PipelineAndSerial *mCurrentPipeline;
+ vk::PipelineHelper *mCurrentPipeline;
gl::PrimitiveMode mCurrentDrawMode;
// Keep a cached pipeline description structure that can be used to query the pipeline cache.
// Kept in a pointer so allocations can be aligned, and structs can be portably packed.
std::unique_ptr<vk::GraphicsPipelineDesc> mGraphicsPipelineDesc;
+ vk::GraphicsPipelineTransitionBits mGraphicsPipelineTransition;
// The descriptor pools are externally sychronized, so cannot be accessed from different
// threads simultaneously. Hence, we keep them in the ContextVk instead of the RendererVk.
diff --git a/src/libANGLE/renderer/vulkan/ProgramVk.h b/src/libANGLE/renderer/vulkan/ProgramVk.h
index 5b6147f..70b6ad0 100644
--- a/src/libANGLE/renderer/vulkan/ProgramVk.h
+++ b/src/libANGLE/renderer/vulkan/ProgramVk.h
@@ -123,7 +123,8 @@
gl::PrimitiveMode mode,
const vk::GraphicsPipelineDesc &desc,
const gl::AttributesMask &activeAttribLocations,
- vk::PipelineAndSerial **pipelineOut)
+ const vk::GraphicsPipelineDesc **descPtrOut,
+ vk::PipelineHelper **pipelineOut)
{
vk::ShaderProgramHelper *shaderProgram;
ANGLE_TRY(initShaders(contextVk, mode, &shaderProgram));
@@ -132,7 +133,7 @@
return shaderProgram->getGraphicsPipeline(
contextVk, &renderer->getRenderPassCache(), renderer->getPipelineCache(),
renderer->getCurrentQueueSerial(), mPipelineLayout.get(), desc, activeAttribLocations,
- pipelineOut);
+ descPtrOut, pipelineOut);
}
private:
diff --git a/src/libANGLE/renderer/vulkan/README.md b/src/libANGLE/renderer/vulkan/README.md
new file mode 100644
index 0000000..dba3eec
--- /dev/null
+++ b/src/libANGLE/renderer/vulkan/README.md
@@ -0,0 +1,65 @@
+# ANGLE: Vulkan Back-end
+
+ANGLE's Vulkan back-end implementation lives in this folder.
+
+[Vulkan](https://www.khronos.org/vulkan/) is an explicit graphics API. It has a lot in common with
+other explicit APIs such as Microsoft's
+[D3D12](https://docs.microsoft.com/en-us/windows/desktop/direct3d12/directx-12-programming-guide)
+and Apple's [Metal](https://developer.apple.com/metal/). Compared to APIs like OpenGL or D3D11
+explicit APIs can offer a number of significant benefits:
+
+ * Lower API call CPU overhead.
+ * A smaller API surface with more direct hardware control.
+ * Better support for multi-core programming.
+ * Vulkan in particular has open-source tooling and tests.
+
+## Back-end Design
+
+The [RendererVk](RendererVk.cpp) is a singleton. RendererVk owns shared global resources like the
+[VkDevice](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkDevice.html),
+[VkQueue](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkQueue.html), the
+[Vulkan format tables](vk_format_utils.h) and [internal Vulkan shaders](shaders). The back-end
+creates a new [ContextVk](ContextVk.cpp) instance to manage each allocated OpenGL Context. ContextVk
+processes state changes and handles action commands like `glDrawArrays` and `glDrawElements`.
+
+### Fast OpenGL State Transitions
+
+Typical OpenGL programs issue a few small state change commands between draw call commands. We want
+the typical app's use case to be as fast as possible so this leads to unique performance challenges.
+
+Vulkan in quite different from OpenGL because it requires a separate compiled
+[VkPipeline](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkPipeline.html)
+for each state vector. Compiling VkPipelines is multiple orders of magnitude slower than enabling or
+disabling an OpenGL render state. To speed this up we use three levels of caching when transitioning
+states in the Vulkan back-end.
+
+The first level is the driver's
+[VkPipelineCache](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkPipelineCache.html). The driver cache reduces pipeline recompilation time
+significantly. But even cached pipeline recompilations are orders of manitude slower than OpenGL
+state changes.
+
+The second level cache is an ANGLE-owned hash map from OpenGL state vectors to compiled pipelines.
+See
+[GraphicsPipelineCache](https://chromium.googlesource.com/angle/angle/+/225f08bf85a368f905362cdd1366e4795680452c/src/libANGLE/renderer/vulkan/vk_cache_utils.h#498)
+in [vk_cache_utils.h](vk_cache_utils.h). ANGLE's
+[GraphicsPipelineDesc](https://chromium.googlesource.com/angle/angle/+/225f08bf85a368f905362cdd1366e4795680452c/src/libANGLE/renderer/vulkan/vk_cache_utils.h#244)
+class is a tightly packed 256-byte description of the current OpenGL rendering state. We
+also use a [xxHash](https://github.com/Cyan4973/xxHash) for the fastest possible hash computation.
+The hash map speeds up state changes considerably. But it is still significantly slower than OpenGL
+implementations.
+
+To get best performance we use a transition table from each OpenGL state vector to neighbouring
+state vectors. The transition table points from GraphicsPipelineCache entries directly to
+neighbouring VkPipeline objects. When the application changes state the state change bits are
+recorded into a compact bit mask that covers the GraphicsPipelineDesc state vector. Then on the next
+draw call we scan the transition bit mask and compare the GraphicsPipelineDesc of the current state
+vector and the state vector of the cached transition. With the hash map we compute a hash over the
+entire state vector and then do a 256-byte `memcmp` to guard against hash collisions. With the
+transition table we will only compare as many bytes as were changed in the transition bit mask. By
+skipping the expensive hashing and `memcmp` we can get as good or faster performance than native
+OpenGL drivers.
+
+Note that the current design of the transition table stores transitions in an unsorted list. If
+applications map from one state to many this will slow down the transition time. This could be
+improved in the future using a faster look up. For instance we could keep a sorted transition table
+or use a small hash map for transitions.
diff --git a/src/libANGLE/renderer/vulkan/UtilsVk.cpp b/src/libANGLE/renderer/vulkan/UtilsVk.cpp
index a59c2cf..7c2ba26 100644
--- a/src/libANGLE/renderer/vulkan/UtilsVk.cpp
+++ b/src/libANGLE/renderer/vulkan/UtilsVk.cpp
@@ -325,25 +325,30 @@
Serial serial = renderer->getCurrentQueueSerial();
- vk::PipelineAndSerial *pipelineAndSerial;
if (isCompute)
{
+ vk::PipelineAndSerial *pipelineAndSerial;
program->setShader(gl::ShaderType::Compute, fsCsShader);
ANGLE_TRY(program->getComputePipeline(context, pipelineLayout.get(), &pipelineAndSerial));
+ pipelineAndSerial->updateSerial(serial);
+ commandBuffer->bindPipeline(bindPoint, pipelineAndSerial->get());
}
else
{
program->setShader(gl::ShaderType::Vertex, vsShader);
program->setShader(gl::ShaderType::Fragment, fsCsShader);
+ // This value is not used but is passed to getGraphicsPipeline to avoid a nullptr check.
+ const vk::GraphicsPipelineDesc *descPtr;
+ vk::PipelineHelper *helper;
+
ANGLE_TRY(program->getGraphicsPipeline(
context, &renderer->getRenderPassCache(), renderer->getPipelineCache(), serial,
- pipelineLayout.get(), *pipelineDesc, gl::AttributesMask(), &pipelineAndSerial));
+ pipelineLayout.get(), *pipelineDesc, gl::AttributesMask(), &descPtr, &helper));
+ helper->updateSerial(serial);
+ commandBuffer->bindPipeline(bindPoint, helper->getPipeline());
}
- commandBuffer->bindPipeline(bindPoint, pipelineAndSerial->get());
- pipelineAndSerial->updateSerial(serial);
-
if (descriptorSet != VK_NULL_HANDLE)
{
commandBuffer->bindDescriptorSets(bindPoint, pipelineLayout.get(), 0, 1, &descriptorSet, 0,
@@ -619,8 +624,8 @@
vk::GraphicsPipelineDesc pipelineDesc;
pipelineDesc.initDefaults();
- pipelineDesc.updateColorWriteMask(params.colorMaskFlags, *params.alphaMask);
- pipelineDesc.updateRenderPassDesc(*params.renderPassDesc);
+ pipelineDesc.setColorWriteMask(params.colorMaskFlags, *params.alphaMask);
+ pipelineDesc.setRenderPassDesc(*params.renderPassDesc);
vk::ShaderLibrary &shaderLibrary = renderer->getShaderLibrary();
vk::RefCounted<vk::ShaderAndSerial> *vertexShader = nullptr;
@@ -707,7 +712,7 @@
vk::GraphicsPipelineDesc pipelineDesc;
pipelineDesc.initDefaults();
- pipelineDesc.updateRenderPassDesc(renderPassDesc);
+ pipelineDesc.setRenderPassDesc(renderPassDesc);
gl::Rectangle renderArea;
renderArea.x = params.destOffset[0];
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
index ac0d7f7..f33829e 100644
--- a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
@@ -158,10 +158,10 @@
uint8_t stencilReference,
VkStencilOpState *stateOut)
{
- stateOut->failOp = static_cast<VkStencilOp>(packedState.failOp);
- stateOut->passOp = static_cast<VkStencilOp>(packedState.passOp);
- stateOut->depthFailOp = static_cast<VkStencilOp>(packedState.depthFailOp);
- stateOut->compareOp = static_cast<VkCompareOp>(packedState.compareOp);
+ stateOut->failOp = static_cast<VkStencilOp>(packedState.ops.fail);
+ stateOut->passOp = static_cast<VkStencilOp>(packedState.ops.pass);
+ stateOut->depthFailOp = static_cast<VkStencilOp>(packedState.ops.depthFail);
+ stateOut->compareOp = static_cast<VkCompareOp>(packedState.ops.compare);
stateOut->compareMask = packedState.compareMask;
stateOut->writeMask = packedState.writeMask;
stateOut->reference = stencilReference;
@@ -281,6 +281,54 @@
#define SetBitField(lhs, rhs) \
lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \
ASSERT(static_cast<decltype(rhs)>(lhs) == rhs);
+
+// When converting a byte number to a transition bit index we can shift instead of divide.
+constexpr size_t kTransitionByteShift = Log2(kGraphicsPipelineDirtyBitBytes);
+
+// When converting a number of bits offset to a transition bit index we can also shift.
+constexpr size_t kBitsPerByte = 8;
+constexpr size_t kTransitionBitShift = kTransitionByteShift + Log2(kBitsPerByte);
+
+// Helper macro to map from a PipelineDesc struct and field to a dirty bit index.
+// Uses the 'offsetof' macro to compute the offset 'Member' within the PipelineDesc
+// and the offset of 'Field' within 'Member'. We can optimize the dirty bit setting by computing
+// the shifted dirty bit at compile time instead of calling "set".
+#define ANGLE_GET_TRANSITION_BIT(Member, Field) \
+ ((offsetof(GraphicsPipelineDesc, Member) + offsetof(decltype(Member), Field)) >> \
+ kTransitionByteShift)
+
+// Indexed dirty bits cannot be entirely computed at compile time since the index is passed to
+// the update function.
+#define ANGLE_GET_INDEXED_TRANSITION_BIT(Member, Field, Index, BitWidth) \
+ (((BitWidth * Index) >> kTransitionBitShift) + ANGLE_GET_TRANSITION_BIT(Member, Field))
+
+bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
+ GraphicsPipelineTransitionBits bitsB,
+ const GraphicsPipelineDesc &descA,
+ const GraphicsPipelineDesc &descB)
+{
+ if (bitsA != bitsB)
+ return false;
+
+ // We currently mask over 4 bytes of the pipeline description with each dirty bit.
+ // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
+ // of the code faster. The for loop below would scan over twice as many bits per iteration.
+ // But there may be more collisions between the same dirty bit masks leading to different
+ // transitions. Thus there may be additional cost when applications use many transitions.
+ // We should revisit this in the future and investigate using different bit widths.
+ static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
+
+ const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
+ const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
+
+ for (size_t dirtyBit : bitsA)
+ {
+ if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
+ return false;
+ }
+
+ return true;
+}
} // anonymous namespace
// RenderPassDesc implementation.
@@ -391,53 +439,52 @@
void GraphicsPipelineDesc::initDefaults()
{
- mRasterizationAndMultisampleStateInfo.depthClampEnable = 0;
- mRasterizationAndMultisampleStateInfo.rasterizationDiscardEnable = 0;
- SetBitField(mRasterizationAndMultisampleStateInfo.polygonMode, VK_POLYGON_MODE_FILL);
- SetBitField(mRasterizationAndMultisampleStateInfo.cullMode, VK_CULL_MODE_NONE);
- SetBitField(mRasterizationAndMultisampleStateInfo.frontFace, VK_FRONT_FACE_CLOCKWISE);
- mRasterizationAndMultisampleStateInfo.depthBiasEnable = 0;
+ mRasterizationAndMultisampleStateInfo.bits.depthClampEnable = 0;
+ mRasterizationAndMultisampleStateInfo.bits.rasterizationDiscardEnable = 0;
+ SetBitField(mRasterizationAndMultisampleStateInfo.bits.polygonMode, VK_POLYGON_MODE_FILL);
+ SetBitField(mRasterizationAndMultisampleStateInfo.bits.cullMode, VK_CULL_MODE_NONE);
+ SetBitField(mRasterizationAndMultisampleStateInfo.bits.frontFace, VK_FRONT_FACE_CLOCKWISE);
+ mRasterizationAndMultisampleStateInfo.bits.depthBiasEnable = 0;
mRasterizationAndMultisampleStateInfo.depthBiasConstantFactor = 0.0f;
mRasterizationAndMultisampleStateInfo.depthBiasClamp = 0.0f;
mRasterizationAndMultisampleStateInfo.depthBiasSlopeFactor = 0.0f;
mRasterizationAndMultisampleStateInfo.lineWidth = 1.0f;
- mRasterizationAndMultisampleStateInfo.rasterizationSamples = 1;
- mRasterizationAndMultisampleStateInfo.sampleShadingEnable = 0;
- mRasterizationAndMultisampleStateInfo.minSampleShading = 0.0f;
+ mRasterizationAndMultisampleStateInfo.bits.rasterizationSamples = 1;
+ mRasterizationAndMultisampleStateInfo.bits.sampleShadingEnable = 0;
+ mRasterizationAndMultisampleStateInfo.minSampleShading = 0.0f;
for (uint32_t &sampleMask : mRasterizationAndMultisampleStateInfo.sampleMask)
{
sampleMask = 0;
}
- mRasterizationAndMultisampleStateInfo.alphaToCoverageEnable = 0;
- mRasterizationAndMultisampleStateInfo.alphaToOneEnable = 0;
+ mRasterizationAndMultisampleStateInfo.bits.alphaToCoverageEnable = 0;
+ mRasterizationAndMultisampleStateInfo.bits.alphaToOneEnable = 0;
- mDepthStencilStateInfo.depthTestEnable = 0;
- mDepthStencilStateInfo.depthWriteEnable = 1;
+ mDepthStencilStateInfo.enable.depthTest = 0;
+ mDepthStencilStateInfo.enable.depthWrite = 1;
SetBitField(mDepthStencilStateInfo.depthCompareOp, VK_COMPARE_OP_LESS);
- mDepthStencilStateInfo.depthBoundsTestEnable = 0;
- mDepthStencilStateInfo.stencilTestEnable = 0;
- mDepthStencilStateInfo.minDepthBounds = 0.0f;
- mDepthStencilStateInfo.maxDepthBounds = 0.0f;
- SetBitField(mDepthStencilStateInfo.front.failOp, VK_STENCIL_OP_KEEP);
- SetBitField(mDepthStencilStateInfo.front.passOp, VK_STENCIL_OP_KEEP);
- SetBitField(mDepthStencilStateInfo.front.depthFailOp, VK_STENCIL_OP_KEEP);
- SetBitField(mDepthStencilStateInfo.front.compareOp, VK_COMPARE_OP_ALWAYS);
+ mDepthStencilStateInfo.enable.depthBoundsTest = 0;
+ mDepthStencilStateInfo.enable.stencilTest = 0;
+ mDepthStencilStateInfo.minDepthBounds = 0.0f;
+ mDepthStencilStateInfo.maxDepthBounds = 0.0f;
+ SetBitField(mDepthStencilStateInfo.front.ops.fail, VK_STENCIL_OP_KEEP);
+ SetBitField(mDepthStencilStateInfo.front.ops.pass, VK_STENCIL_OP_KEEP);
+ SetBitField(mDepthStencilStateInfo.front.ops.depthFail, VK_STENCIL_OP_KEEP);
+ SetBitField(mDepthStencilStateInfo.front.ops.compare, VK_COMPARE_OP_ALWAYS);
SetBitField(mDepthStencilStateInfo.front.compareMask, 0xFF);
SetBitField(mDepthStencilStateInfo.front.writeMask, 0xFF);
mDepthStencilStateInfo.frontStencilReference = 0;
- SetBitField(mDepthStencilStateInfo.back.failOp, VK_STENCIL_OP_KEEP);
- SetBitField(mDepthStencilStateInfo.back.passOp, VK_STENCIL_OP_KEEP);
- SetBitField(mDepthStencilStateInfo.back.depthFailOp, VK_STENCIL_OP_KEEP);
- SetBitField(mDepthStencilStateInfo.back.compareOp, VK_COMPARE_OP_ALWAYS);
+ SetBitField(mDepthStencilStateInfo.back.ops.fail, VK_STENCIL_OP_KEEP);
+ SetBitField(mDepthStencilStateInfo.back.ops.pass, VK_STENCIL_OP_KEEP);
+ SetBitField(mDepthStencilStateInfo.back.ops.depthFail, VK_STENCIL_OP_KEEP);
+ SetBitField(mDepthStencilStateInfo.back.ops.compare, VK_COMPARE_OP_ALWAYS);
SetBitField(mDepthStencilStateInfo.back.compareMask, 0xFF);
SetBitField(mDepthStencilStateInfo.back.writeMask, 0xFF);
mDepthStencilStateInfo.backStencilReference = 0;
- PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend =
- mInputAssembltyAndColorBlendStateInfo;
- inputAndBlend.logicOpEnable = 0;
- inputAndBlend.logicOp = static_cast<uint32_t>(VK_LOGIC_OP_CLEAR);
+ PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = mInputAssemblyAndColorBlendStateInfo;
+ inputAndBlend.logic.opEnable = 0;
+ inputAndBlend.logic.op = static_cast<uint32_t>(VK_LOGIC_OP_CLEAR);
inputAndBlend.blendEnableMask = 0;
inputAndBlend.blendConstants[0] = 0.0f;
inputAndBlend.blendConstants[1] = 0.0f;
@@ -464,8 +511,8 @@
&inputAndBlend.attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS],
blendAttachmentState);
- inputAndBlend.topology = static_cast<uint16_t>(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
- inputAndBlend.primitiveRestartEnable = 0;
+ inputAndBlend.primitive.topology = static_cast<uint16_t>(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
+ inputAndBlend.primitive.restartEnable = 0;
}
angle::Result GraphicsPipelineDesc::initializePipeline(
@@ -550,9 +597,9 @@
inputAssemblyState.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
inputAssemblyState.flags = 0;
inputAssemblyState.topology =
- static_cast<VkPrimitiveTopology>(mInputAssembltyAndColorBlendStateInfo.topology);
+ static_cast<VkPrimitiveTopology>(mInputAssemblyAndColorBlendStateInfo.primitive.topology);
inputAssemblyState.primitiveRestartEnable =
- static_cast<VkBool32>(mInputAssembltyAndColorBlendStateInfo.primitiveRestartEnable);
+ static_cast<VkBool32>(mInputAssemblyAndColorBlendStateInfo.primitive.restartEnable);
// Set initial viewport and scissor state.
@@ -569,13 +616,13 @@
// Rasterizer state.
rasterState.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterState.flags = 0;
- rasterState.depthClampEnable = static_cast<VkBool32>(rasterAndMS.depthClampEnable);
+ rasterState.depthClampEnable = static_cast<VkBool32>(rasterAndMS.bits.depthClampEnable);
rasterState.rasterizerDiscardEnable =
- static_cast<VkBool32>(rasterAndMS.rasterizationDiscardEnable);
- rasterState.polygonMode = static_cast<VkPolygonMode>(rasterAndMS.polygonMode);
- rasterState.cullMode = static_cast<VkCullModeFlags>(rasterAndMS.cullMode);
- rasterState.frontFace = static_cast<VkFrontFace>(rasterAndMS.frontFace);
- rasterState.depthBiasEnable = static_cast<VkBool32>(rasterAndMS.depthBiasEnable);
+ static_cast<VkBool32>(rasterAndMS.bits.rasterizationDiscardEnable);
+ rasterState.polygonMode = static_cast<VkPolygonMode>(rasterAndMS.bits.polygonMode);
+ rasterState.cullMode = static_cast<VkCullModeFlags>(rasterAndMS.bits.cullMode);
+ rasterState.frontFace = static_cast<VkFrontFace>(rasterAndMS.bits.frontFace);
+ rasterState.depthBiasEnable = static_cast<VkBool32>(rasterAndMS.bits.depthBiasEnable);
rasterState.depthBiasConstantFactor = rasterAndMS.depthBiasConstantFactor;
rasterState.depthBiasClamp = rasterAndMS.depthBiasClamp;
rasterState.depthBiasSlopeFactor = rasterAndMS.depthBiasSlopeFactor;
@@ -584,28 +631,30 @@
// Multisample state.
multisampleState.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
multisampleState.flags = 0;
- multisampleState.rasterizationSamples = gl_vk::GetSamples(rasterAndMS.rasterizationSamples);
- multisampleState.sampleShadingEnable = static_cast<VkBool32>(rasterAndMS.sampleShadingEnable);
- multisampleState.minSampleShading = rasterAndMS.minSampleShading;
+ multisampleState.rasterizationSamples =
+ gl_vk::GetSamples(rasterAndMS.bits.rasterizationSamples);
+ multisampleState.sampleShadingEnable =
+ static_cast<VkBool32>(rasterAndMS.bits.sampleShadingEnable);
+ multisampleState.minSampleShading = rasterAndMS.minSampleShading;
// TODO(jmadill): sample masks
multisampleState.pSampleMask = nullptr;
multisampleState.alphaToCoverageEnable =
- static_cast<VkBool32>(rasterAndMS.alphaToCoverageEnable);
- multisampleState.alphaToOneEnable = static_cast<VkBool32>(rasterAndMS.alphaToOneEnable);
+ static_cast<VkBool32>(rasterAndMS.bits.alphaToCoverageEnable);
+ multisampleState.alphaToOneEnable = static_cast<VkBool32>(rasterAndMS.bits.alphaToOneEnable);
// Depth/stencil state.
depthStencilState.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
depthStencilState.flags = 0;
depthStencilState.depthTestEnable =
- static_cast<VkBool32>(mDepthStencilStateInfo.depthTestEnable);
+ static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthTest);
depthStencilState.depthWriteEnable =
- static_cast<VkBool32>(mDepthStencilStateInfo.depthWriteEnable);
+ static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthWrite);
depthStencilState.depthCompareOp =
static_cast<VkCompareOp>(mDepthStencilStateInfo.depthCompareOp);
depthStencilState.depthBoundsTestEnable =
- static_cast<VkBool32>(mDepthStencilStateInfo.depthBoundsTestEnable);
+ static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthBoundsTest);
depthStencilState.stencilTestEnable =
- static_cast<VkBool32>(mDepthStencilStateInfo.stencilTestEnable);
+ static_cast<VkBool32>(mDepthStencilStateInfo.enable.stencilTest);
UnpackStencilState(mDepthStencilStateInfo.front, mDepthStencilStateInfo.frontStencilReference,
&depthStencilState.front);
UnpackStencilState(mDepthStencilStateInfo.back, mDepthStencilStateInfo.backStencilReference,
@@ -614,12 +663,12 @@
depthStencilState.maxDepthBounds = mDepthStencilStateInfo.maxDepthBounds;
const PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend =
- mInputAssembltyAndColorBlendStateInfo;
+ mInputAssemblyAndColorBlendStateInfo;
blendState.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
blendState.flags = 0;
- blendState.logicOpEnable = static_cast<VkBool32>(inputAndBlend.logicOpEnable);
- blendState.logicOp = static_cast<VkLogicOp>(inputAndBlend.logicOp);
+ blendState.logicOpEnable = static_cast<VkBool32>(inputAndBlend.logic.opEnable);
+ blendState.logicOp = static_cast<VkLogicOp>(inputAndBlend.logic.op);
blendState.attachmentCount = mRenderPassDesc.colorAttachmentCount();
blendState.pAttachments = blendAttachmentState.data();
@@ -672,7 +721,8 @@
return angle::Result::Continue;
}
-void GraphicsPipelineDesc::updateVertexInput(uint32_t attribIndex,
+void GraphicsPipelineDesc::updateVertexInput(GraphicsPipelineTransitionBits *transition,
+ uint32_t attribIndex,
GLuint stride,
GLuint divisor,
VkFormat format,
@@ -682,90 +732,124 @@
bindingDesc.stride = static_cast<uint16_t>(stride);
bindingDesc.inputRate = static_cast<uint16_t>(divisor > 0 ? VK_VERTEX_INPUT_RATE_INSTANCE
: VK_VERTEX_INPUT_RATE_VERTEX);
+ constexpr size_t kBindingBaseBit =
+ offsetof(GraphicsPipelineDesc, mVertexInputBindings) >> kTransitionByteShift;
+ transition->set(kBindingBaseBit + attribIndex);
+ static_assert(kVertexInputBindingSize == 4, "Size mismatch");
- ASSERT(format <= std::numeric_limits<uint16_t>::max());
if (format == VK_FORMAT_UNDEFINED)
{
UNIMPLEMENTED();
}
- mVertexInputAttribs.formats[attribIndex] = static_cast<uint8_t>(format);
- mVertexInputAttribs.offsets[attribIndex] = static_cast<uint16_t>(relativeOffset);
+ SetBitField(mVertexInputAttribs.formats[attribIndex], format);
+ SetBitField(mVertexInputAttribs.offsets[attribIndex], relativeOffset);
+ transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, formats, attribIndex, 8));
+ transition->set(
+ ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, offsets, attribIndex, 16));
}
-void GraphicsPipelineDesc::updateTopology(gl::PrimitiveMode drawMode)
+void GraphicsPipelineDesc::updateTopology(GraphicsPipelineTransitionBits *transition,
+ gl::PrimitiveMode drawMode)
{
- mInputAssembltyAndColorBlendStateInfo.topology =
- static_cast<uint32_t>(gl_vk::GetPrimitiveTopology(drawMode));
+ VkPrimitiveTopology vkTopology = gl_vk::GetPrimitiveTopology(drawMode);
+ SetBitField(mInputAssemblyAndColorBlendStateInfo.primitive.topology, vkTopology);
+
+ transition->set(ANGLE_GET_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, primitive));
}
-void GraphicsPipelineDesc::updateCullMode(const gl::RasterizerState &rasterState)
+void GraphicsPipelineDesc::updateCullMode(GraphicsPipelineTransitionBits *transition,
+ const gl::RasterizerState &rasterState)
{
- mRasterizationAndMultisampleStateInfo.cullMode =
+ mRasterizationAndMultisampleStateInfo.bits.cullMode =
static_cast<uint16_t>(gl_vk::GetCullMode(rasterState));
+ transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits));
}
-void GraphicsPipelineDesc::updateFrontFace(const gl::RasterizerState &rasterState,
+void GraphicsPipelineDesc::updateFrontFace(GraphicsPipelineTransitionBits *transition,
+ const gl::RasterizerState &rasterState,
bool invertFrontFace)
{
- mRasterizationAndMultisampleStateInfo.frontFace =
+ mRasterizationAndMultisampleStateInfo.bits.frontFace =
static_cast<uint16_t>(gl_vk::GetFrontFace(rasterState.frontFace, invertFrontFace));
+ transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits));
}
-void GraphicsPipelineDesc::updateLineWidth(float lineWidth)
+void GraphicsPipelineDesc::updateLineWidth(GraphicsPipelineTransitionBits *transition,
+ float lineWidth)
{
mRasterizationAndMultisampleStateInfo.lineWidth = lineWidth;
+ transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, lineWidth));
}
-const RenderPassDesc &GraphicsPipelineDesc::getRenderPassDesc() const
+void GraphicsPipelineDesc::updateBlendColor(GraphicsPipelineTransitionBits *transition,
+ const gl::ColorF &color)
{
- return mRenderPassDesc;
+ mInputAssemblyAndColorBlendStateInfo.blendConstants[0] = color.red;
+ mInputAssemblyAndColorBlendStateInfo.blendConstants[1] = color.green;
+ mInputAssemblyAndColorBlendStateInfo.blendConstants[2] = color.blue;
+ mInputAssemblyAndColorBlendStateInfo.blendConstants[3] = color.alpha;
+ constexpr size_t kSize = sizeof(mInputAssemblyAndColorBlendStateInfo.blendConstants[0]) * 8;
+
+ for (int index = 0; index < 4; ++index)
+ {
+ const size_t kBit = ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
+ blendConstants, index, kSize);
+ transition->set(kBit);
+ }
}
-void GraphicsPipelineDesc::updateBlendColor(const gl::ColorF &color)
-{
- mInputAssembltyAndColorBlendStateInfo.blendConstants[0] = color.red;
- mInputAssembltyAndColorBlendStateInfo.blendConstants[1] = color.green;
- mInputAssembltyAndColorBlendStateInfo.blendConstants[2] = color.blue;
- mInputAssembltyAndColorBlendStateInfo.blendConstants[3] = color.alpha;
-}
-
-void GraphicsPipelineDesc::updateBlendEnabled(bool isBlendEnabled)
+void GraphicsPipelineDesc::updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
+ bool isBlendEnabled)
{
gl::DrawBufferMask blendEnabled;
if (isBlendEnabled)
blendEnabled.set();
- mInputAssembltyAndColorBlendStateInfo.blendEnableMask =
+ mInputAssemblyAndColorBlendStateInfo.blendEnableMask =
static_cast<uint8_t>(blendEnabled.bits());
+ transition->set(
+ ANGLE_GET_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, blendEnableMask));
}
-void GraphicsPipelineDesc::updateBlendEquations(const gl::BlendState &blendState)
+void GraphicsPipelineDesc::updateBlendEquations(GraphicsPipelineTransitionBits *transition,
+ const gl::BlendState &blendState)
{
- for (PackedColorBlendAttachmentState &blendAttachmentState :
- mInputAssembltyAndColorBlendStateInfo.attachments)
+ constexpr size_t kSize = sizeof(PackedColorBlendAttachmentState) * 8;
+
+ for (size_t attachmentIndex = 0; attachmentIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
+ ++attachmentIndex)
{
+ PackedColorBlendAttachmentState &blendAttachmentState =
+ mInputAssemblyAndColorBlendStateInfo.attachments[attachmentIndex];
blendAttachmentState.colorBlendOp = PackGLBlendOp(blendState.blendEquationRGB);
blendAttachmentState.alphaBlendOp = PackGLBlendOp(blendState.blendEquationAlpha);
+ transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
+ attachments, attachmentIndex, kSize));
}
}
-void GraphicsPipelineDesc::updateBlendFuncs(const gl::BlendState &blendState)
+void GraphicsPipelineDesc::updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
+ const gl::BlendState &blendState)
{
- for (PackedColorBlendAttachmentState &blendAttachmentState :
- mInputAssembltyAndColorBlendStateInfo.attachments)
+ constexpr size_t kSize = sizeof(PackedColorBlendAttachmentState) * 8;
+ for (size_t attachmentIndex = 0; attachmentIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
+ ++attachmentIndex)
{
+ PackedColorBlendAttachmentState &blendAttachmentState =
+ mInputAssemblyAndColorBlendStateInfo.attachments[attachmentIndex];
blendAttachmentState.srcColorBlendFactor = PackGLBlendFactor(blendState.sourceBlendRGB);
blendAttachmentState.dstColorBlendFactor = PackGLBlendFactor(blendState.destBlendRGB);
blendAttachmentState.srcAlphaBlendFactor = PackGLBlendFactor(blendState.sourceBlendAlpha);
blendAttachmentState.dstAlphaBlendFactor = PackGLBlendFactor(blendState.destBlendAlpha);
+ transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
+ attachments, attachmentIndex, kSize));
}
}
-void GraphicsPipelineDesc::updateColorWriteMask(VkColorComponentFlags colorComponentFlags,
- const gl::DrawBufferMask &alphaMask)
+void GraphicsPipelineDesc::setColorWriteMask(VkColorComponentFlags colorComponentFlags,
+ const gl::DrawBufferMask &alphaMask)
{
- PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend =
- mInputAssembltyAndColorBlendStateInfo;
+ PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = mInputAssemblyAndColorBlendStateInfo;
uint8_t colorMask = static_cast<uint8_t>(colorComponentFlags);
for (size_t colorIndex = 0; colorIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; colorIndex++)
@@ -775,105 +859,163 @@
}
}
-void GraphicsPipelineDesc::updateDepthTestEnabled(const gl::DepthStencilState &depthStencilState,
+void GraphicsPipelineDesc::updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
+ VkColorComponentFlags colorComponentFlags,
+ const gl::DrawBufferMask &alphaMask)
+{
+ setColorWriteMask(colorComponentFlags, alphaMask);
+
+ for (size_t colorIndex = 0; colorIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; colorIndex++)
+ {
+ transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo,
+ colorWriteMaskBits, colorIndex, 4));
+ }
+}
+
+void GraphicsPipelineDesc::updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Only enable the depth test if the draw framebuffer has a depth buffer. It's possible that
// we're emulating a stencil-only buffer with a depth-stencil buffer
- mDepthStencilStateInfo.depthTestEnable =
+ mDepthStencilStateInfo.enable.depthTest =
static_cast<uint8_t>(depthStencilState.depthTest && drawFramebuffer->hasDepth());
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable));
}
-void GraphicsPipelineDesc::updateDepthFunc(const gl::DepthStencilState &depthStencilState)
+void GraphicsPipelineDesc::updateDepthFunc(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.depthCompareOp = PackGLCompareFunc(depthStencilState.depthFunc);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, depthCompareOp));
}
-void GraphicsPipelineDesc::updateDepthWriteEnabled(const gl::DepthStencilState &depthStencilState,
+void GraphicsPipelineDesc::updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Don't write to depth buffers that should not exist
- mDepthStencilStateInfo.depthWriteEnable =
+ mDepthStencilStateInfo.enable.depthWrite =
static_cast<uint8_t>(drawFramebuffer->hasDepth() ? depthStencilState.depthMask : 0);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable));
}
-void GraphicsPipelineDesc::updateStencilTestEnabled(const gl::DepthStencilState &depthStencilState,
+void GraphicsPipelineDesc::updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Only enable the stencil test if the draw framebuffer has a stencil buffer. It's possible
// that we're emulating a depth-only buffer with a depth-stencil buffer
- mDepthStencilStateInfo.stencilTestEnable =
+ mDepthStencilStateInfo.enable.stencilTest =
static_cast<uint8_t>(depthStencilState.stencilTest && drawFramebuffer->hasStencil());
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable));
}
-void GraphicsPipelineDesc::updateStencilFrontFuncs(GLint ref,
+void GraphicsPipelineDesc::updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
+ GLint ref,
const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.frontStencilReference = static_cast<uint8_t>(ref);
- mDepthStencilStateInfo.front.compareOp = PackGLCompareFunc(depthStencilState.stencilFunc);
+ mDepthStencilStateInfo.front.ops.compare = PackGLCompareFunc(depthStencilState.stencilFunc);
mDepthStencilStateInfo.front.compareMask = static_cast<uint8_t>(depthStencilState.stencilMask);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front));
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, frontStencilReference));
}
-void GraphicsPipelineDesc::updateStencilBackFuncs(GLint ref,
+void GraphicsPipelineDesc::updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
+ GLint ref,
const gl::DepthStencilState &depthStencilState)
{
mDepthStencilStateInfo.backStencilReference = static_cast<uint8_t>(ref);
- mDepthStencilStateInfo.back.compareOp = PackGLCompareFunc(depthStencilState.stencilBackFunc);
+ mDepthStencilStateInfo.back.ops.compare = PackGLCompareFunc(depthStencilState.stencilBackFunc);
mDepthStencilStateInfo.back.compareMask =
static_cast<uint8_t>(depthStencilState.stencilBackMask);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back));
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, backStencilReference));
}
-void GraphicsPipelineDesc::updateStencilFrontOps(const gl::DepthStencilState &depthStencilState)
+void GraphicsPipelineDesc::updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState)
{
- mDepthStencilStateInfo.front.passOp = PackGLStencilOp(depthStencilState.stencilPassDepthPass);
- mDepthStencilStateInfo.front.failOp = PackGLStencilOp(depthStencilState.stencilFail);
- mDepthStencilStateInfo.front.depthFailOp =
+ mDepthStencilStateInfo.front.ops.pass = PackGLStencilOp(depthStencilState.stencilPassDepthPass);
+ mDepthStencilStateInfo.front.ops.fail = PackGLStencilOp(depthStencilState.stencilFail);
+ mDepthStencilStateInfo.front.ops.depthFail =
PackGLStencilOp(depthStencilState.stencilPassDepthFail);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front));
}
-void GraphicsPipelineDesc::updateStencilBackOps(const gl::DepthStencilState &depthStencilState)
+void GraphicsPipelineDesc::updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState)
{
- mDepthStencilStateInfo.back.passOp =
+ mDepthStencilStateInfo.back.ops.pass =
PackGLStencilOp(depthStencilState.stencilBackPassDepthPass);
- mDepthStencilStateInfo.back.failOp = PackGLStencilOp(depthStencilState.stencilBackFail);
- mDepthStencilStateInfo.back.depthFailOp =
+ mDepthStencilStateInfo.back.ops.fail = PackGLStencilOp(depthStencilState.stencilBackFail);
+ mDepthStencilStateInfo.back.ops.depthFail =
PackGLStencilOp(depthStencilState.stencilBackPassDepthFail);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back));
}
void GraphicsPipelineDesc::updateStencilFrontWriteMask(
+ GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Don't write to stencil buffers that should not exist
mDepthStencilStateInfo.front.writeMask = static_cast<uint8_t>(
drawFramebuffer->hasStencil() ? depthStencilState.stencilWritemask : 0);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front));
}
void GraphicsPipelineDesc::updateStencilBackWriteMask(
+ GraphicsPipelineTransitionBits *transition,
const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer)
{
// Don't write to stencil buffers that should not exist
mDepthStencilStateInfo.back.writeMask = static_cast<uint8_t>(
drawFramebuffer->hasStencil() ? depthStencilState.stencilBackWritemask : 0);
+ transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back));
}
-void GraphicsPipelineDesc::updatePolygonOffsetFillEnabled(bool enabled)
+void GraphicsPipelineDesc::updatePolygonOffsetFillEnabled(
+ GraphicsPipelineTransitionBits *transition,
+ bool enabled)
{
- mRasterizationAndMultisampleStateInfo.depthBiasEnable = enabled;
+ mRasterizationAndMultisampleStateInfo.bits.depthBiasEnable = enabled;
+ transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits));
}
-void GraphicsPipelineDesc::updatePolygonOffset(const gl::RasterizerState &rasterState)
+void GraphicsPipelineDesc::updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
+ const gl::RasterizerState &rasterState)
{
mRasterizationAndMultisampleStateInfo.depthBiasSlopeFactor = rasterState.polygonOffsetFactor;
mRasterizationAndMultisampleStateInfo.depthBiasConstantFactor = rasterState.polygonOffsetUnits;
+ transition->set(
+ ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, depthBiasSlopeFactor));
+ transition->set(
+ ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, depthBiasConstantFactor));
}
-void GraphicsPipelineDesc::updateRenderPassDesc(const RenderPassDesc &renderPassDesc)
+void GraphicsPipelineDesc::setRenderPassDesc(const RenderPassDesc &renderPassDesc)
{
mRenderPassDesc = renderPassDesc;
}
+void GraphicsPipelineDesc::updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
+ const RenderPassDesc &renderPassDesc)
+{
+ setRenderPassDesc(renderPassDesc);
+
+ // The RenderPass is a special case where it spans multiple bits but has no member.
+ constexpr size_t kFirstBit =
+ offsetof(GraphicsPipelineDesc, mRenderPassDesc) >> kTransitionByteShift;
+ constexpr size_t kBitCount = kRenderPassDescSize >> kTransitionByteShift;
+ for (size_t bit = 0; bit < kBitCount; ++bit)
+ {
+ transition->set(kFirstBit + bit);
+ }
+}
+
// AttachmentOpsArray implementation.
AttachmentOpsArray::AttachmentOpsArray()
{
@@ -1026,6 +1168,41 @@
{
return mPushConstantRanges;
}
+
+// PipelineHelper implementation.
+PipelineHelper::PipelineHelper() = default;
+
+PipelineHelper::~PipelineHelper() = default;
+
+void PipelineHelper::destroy(VkDevice device)
+{
+ mPipeline.destroy(device);
+}
+
+bool PipelineHelper::findTransition(GraphicsPipelineTransitionBits bits,
+ const GraphicsPipelineDesc &desc,
+ PipelineHelper **pipelineOut) const
+{
+ // Search could be improved using sorting or hashing.
+ for (const GraphicsPipelineTransition &transition : mTransitions)
+ {
+ if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
+ {
+ *pipelineOut = transition.target;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void PipelineHelper::addTransition(GraphicsPipelineTransitionBits bits,
+ const GraphicsPipelineDesc *desc,
+ PipelineHelper *pipeline)
+{
+ GraphicsPipelineTransition transition = {bits, desc, pipeline};
+ mTransitions.push_back(transition);
+}
} // namespace vk
// RenderPassCache implementation.
@@ -1129,8 +1306,8 @@
{
for (auto &item : mPayload)
{
- vk::PipelineAndSerial &pipeline = item.second;
- pipeline.get().destroy(device);
+ vk::PipelineHelper &pipeline = item.second;
+ pipeline.destroy(device);
}
mPayload.clear();
@@ -1140,8 +1317,8 @@
{
for (auto &item : mPayload)
{
- vk::PipelineAndSerial &pipeline = item.second;
- renderer->releaseObject(pipeline.getSerial(), &pipeline.get());
+ vk::PipelineHelper &pipeline = item.second;
+ renderer->releaseObject(pipeline.getSerial(), &pipeline.getPipeline());
}
mPayload.clear();
@@ -1156,7 +1333,8 @@
const vk::ShaderModule &vertexModule,
const vk::ShaderModule &fragmentModule,
const vk::GraphicsPipelineDesc &desc,
- vk::PipelineAndSerial **pipelineOut)
+ const vk::GraphicsPipelineDesc **descPtrOut,
+ vk::PipelineHelper **pipelineOut)
{
vk::Pipeline newPipeline;
@@ -1169,9 +1347,9 @@
}
// The Serial will be updated outside of this query.
- auto insertedItem =
- mPayload.emplace(desc, vk::PipelineAndSerial(std::move(newPipeline), Serial()));
- *pipelineOut = &insertedItem.first->second;
+ auto insertedItem = mPayload.emplace(desc, std::move(newPipeline));
+ *descPtrOut = &insertedItem.first->first;
+ *pipelineOut = &insertedItem.first->second;
return angle::Result::Continue;
}
@@ -1184,7 +1362,7 @@
return;
}
- mPayload.emplace(desc, vk::PipelineAndSerial(std::move(pipeline), Serial()));
+ mPayload.emplace(desc, std::move(pipeline));
}
// DescriptorSetLayoutCache implementation.
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.h b/src/libANGLE/renderer/vulkan/vk_cache_utils.h
index 110268f..5c497a0 100644
--- a/src/libANGLE/renderer/vulkan/vk_cache_utils.h
+++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.h
@@ -124,15 +124,6 @@
static_assert(sizeof(AttachmentOpsArray) == 80, "Size check failed");
-struct PackedShaderStageInfo final
-{
- uint32_t stage;
- uint32_t moduleSerial;
- // TODO(jmadill): Do we want specialization constants?
-};
-
-static_assert(sizeof(PackedShaderStageInfo) == 8, "Size check failed");
-
struct PackedVertexInputBindingDesc final
{
// Although techncially stride can be any value in ES 2.0, in practice supporting stride
@@ -142,11 +133,22 @@
uint16_t inputRate;
};
-static_assert(sizeof(PackedVertexInputBindingDesc) == 4, "Size check failed");
+constexpr size_t kVertexInputBindingSize = sizeof(PackedVertexInputBindingDesc);
+static_assert(kVertexInputBindingSize == 4, "Size check failed");
-struct PackedRasterizationAndMultisampleStateInfo final
+using VertexInputBindings = gl::AttribArray<PackedVertexInputBindingDesc>;
+constexpr size_t kVertexInputBindingsSize = sizeof(VertexInputBindings);
+
+struct VertexInputAttributes final
{
- // Padded to ensure there's no gaps in this structure or those that use it.
+ uint8_t formats[gl::MAX_VERTEX_ATTRIBS];
+ uint16_t offsets[gl::MAX_VERTEX_ATTRIBS]; // can only take 11 bits on NV
+};
+
+constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
+
+struct RasterizationStateBits final
+{
uint32_t depthClampEnable : 4;
uint32_t rasterizationDiscardEnable : 4;
uint32_t polygonMode : 4;
@@ -157,42 +159,66 @@
uint32_t sampleShadingEnable : 1;
uint32_t alphaToCoverageEnable : 1;
uint32_t alphaToOneEnable : 2;
+};
+
+constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
+static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
+
+struct PackedRasterizationAndMultisampleStateInfo final
+{
+ RasterizationStateBits bits;
+ // Padded to ensure there's no gaps in this structure or those that use it.
float minSampleShading;
uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
- float depthBiasConstantFactor;
// Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
float depthBiasClamp;
+ float depthBiasConstantFactor;
float depthBiasSlopeFactor;
float lineWidth;
};
-static constexpr size_t kPackedRasterizationAndMultisampleStateSize =
+constexpr size_t kPackedRasterizationAndMultisampleStateSize =
sizeof(PackedRasterizationAndMultisampleStateInfo);
static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
+struct StencilOps final
+{
+ uint8_t fail : 4;
+ uint8_t pass : 4;
+ uint8_t depthFail : 4;
+ uint8_t compare : 4;
+};
+
+constexpr size_t kStencilOpsSize = sizeof(StencilOps);
+static_assert(kStencilOpsSize == 2, "Size check failed");
+
struct PackedStencilOpState final
{
- uint8_t failOp : 4;
- uint8_t passOp : 4;
- uint8_t depthFailOp : 4;
- uint8_t compareOp : 4;
+ StencilOps ops;
uint8_t compareMask;
uint8_t writeMask;
};
-static constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
-static_assert(sizeof(PackedStencilOpState) == 4, "Size check failed");
+constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
+static_assert(kPackedStencilOpSize == 4, "Size check failed");
+
+struct DepthStencilEnableFlags final
+{
+ uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding.
+ uint8_t depthWrite : 2;
+ uint8_t depthBoundsTest : 2;
+ uint8_t stencilTest : 2;
+};
+
+constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
+static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
struct PackedDepthStencilStateInfo final
{
- uint8_t depthTestEnable : 1;
- uint8_t depthWriteEnable : 1;
- uint8_t depthCompareOp : 4;
- uint8_t depthBoundsTestEnable : 1;
- uint8_t stencilTestEnable : 1;
+ DepthStencilEnableFlags enable;
uint8_t frontStencilReference;
uint8_t backStencilReference;
- uint8_t padding;
+ uint8_t depthCompareOp; // only needs 4 bits. extra used as padding.
float minDepthBounds;
float maxDepthBounds;
PackedStencilOpState front;
@@ -202,6 +228,15 @@
constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
+struct LogicOpState final
+{
+ uint8_t opEnable : 1;
+ uint8_t op : 7;
+};
+
+constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
+static_assert(kLogicOpStateSize == 1, "Size check failed");
+
struct PackedColorBlendAttachmentState final
{
uint16_t srcColorBlendFactor : 5;
@@ -212,35 +247,50 @@
uint16_t alphaBlendOp : 6;
};
-static_assert(sizeof(PackedColorBlendAttachmentState) == 4, "Size check failed");
+constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
+static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
+
+struct PrimitiveState final
+{
+ uint16_t topology : 15;
+ uint16_t restartEnable : 1;
+};
+
+constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
+static_assert(kPrimitiveStateSize == 2, "Size check failed");
struct PackedInputAssemblyAndColorBlendStateInfo final
{
- uint8_t logicOpEnable : 1;
- uint8_t logicOp : 7;
- uint8_t blendEnableMask;
uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
- uint16_t topology : 15;
- uint16_t primitiveRestartEnable : 1;
float blendConstants[4];
+ LogicOpState logic;
+ uint8_t blendEnableMask;
+ PrimitiveState primitive;
};
constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
sizeof(PackedInputAssemblyAndColorBlendStateInfo);
static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
-using VertexInputBindings = gl::AttribArray<PackedVertexInputBindingDesc>;
+constexpr size_t kGraphicsPipelineDescSumOfSizes =
+ kVertexInputBindingsSize + kVertexInputAttributesSize +
+ kPackedInputAssemblyAndColorBlendStateSize + kPackedRasterizationAndMultisampleStateSize +
+ kPackedDepthStencilStateSize + kRenderPassDescSize;
-struct VertexInputAttributes final
-{
- uint8_t formats[gl::MAX_VERTEX_ATTRIBS];
- uint16_t offsets[gl::MAX_VERTEX_ATTRIBS]; // can only take 11 bits on NV
-};
+// Number of dirty bits in the dirty bit set.
+constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
+constexpr static size_t kNumGraphicsPipelineDirtyBits =
+ kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
+static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
-constexpr size_t kVertexInputBindingsSize = sizeof(VertexInputBindings);
-constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
+// Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
+using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
+// State changes are applied through the update methods. Each update method can also have a
+// sibling method that applies the update without marking a state transition. The non-transition
+// update methods are used for internal shader pipelines. Not every non-transition update method
+// is implemented yet as not every state is used in internal shaders.
class GraphicsPipelineDesc final
{
public:
@@ -258,6 +308,13 @@
void initDefaults();
+ // For custom comparisons.
+ template <typename T>
+ const T *getPtr() const
+ {
+ return reinterpret_cast<const T *>(this);
+ }
+
angle::Result initializePipeline(vk::Context *context,
const vk::PipelineCache &pipelineCacheVk,
const RenderPass &compatibleRenderPass,
@@ -267,53 +324,78 @@
const ShaderModule &fragmentModule,
Pipeline *pipelineOut) const;
- // Vertex input state
- void updateVertexInput(uint32_t attribIndex,
+ // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
+ void updateVertexInput(GraphicsPipelineTransitionBits *transition,
+ uint32_t attribIndex,
GLuint stride,
GLuint divisor,
VkFormat format,
GLuint relativeOffset);
// Input assembly info
- void updateTopology(gl::PrimitiveMode drawMode);
+ void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
// Raster states
- void updateCullMode(const gl::RasterizerState &rasterState);
- void updateFrontFace(const gl::RasterizerState &rasterState, bool invertFrontFace);
- void updateLineWidth(float lineWidth);
+ void updateCullMode(GraphicsPipelineTransitionBits *transition,
+ const gl::RasterizerState &rasterState);
+ void updateFrontFace(GraphicsPipelineTransitionBits *transition,
+ const gl::RasterizerState &rasterState,
+ bool invertFrontFace);
+ void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
// RenderPass description.
- const RenderPassDesc &getRenderPassDesc() const;
- void updateRenderPassDesc(const RenderPassDesc &renderPassDesc);
+ const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
+
+ void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
+ void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
+ const RenderPassDesc &renderPassDesc);
// Blend states
- void updateBlendEnabled(bool isBlendEnabled);
- void updateBlendColor(const gl::ColorF &color);
- void updateBlendFuncs(const gl::BlendState &blendState);
- void updateBlendEquations(const gl::BlendState &blendState);
- void updateColorWriteMask(VkColorComponentFlags colorComponentFlags,
+ void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
+ void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
+ void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
+ const gl::BlendState &blendState);
+ void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
+ const gl::BlendState &blendState);
+ void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
+ const gl::DrawBufferMask &alphaMask);
+ void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
+ VkColorComponentFlags colorComponentFlags,
const gl::DrawBufferMask &alphaMask);
// Depth/stencil states.
- void updateDepthTestEnabled(const gl::DepthStencilState &depthStencilState,
+ void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
- void updateDepthFunc(const gl::DepthStencilState &depthStencilState);
- void updateDepthWriteEnabled(const gl::DepthStencilState &depthStencilState,
+ void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState);
+ void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
- void updateStencilTestEnabled(const gl::DepthStencilState &depthStencilState,
+ void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
- void updateStencilFrontFuncs(GLint ref, const gl::DepthStencilState &depthStencilState);
- void updateStencilBackFuncs(GLint ref, const gl::DepthStencilState &depthStencilState);
- void updateStencilFrontOps(const gl::DepthStencilState &depthStencilState);
- void updateStencilBackOps(const gl::DepthStencilState &depthStencilState);
- void updateStencilFrontWriteMask(const gl::DepthStencilState &depthStencilState,
+ void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
+ GLint ref,
+ const gl::DepthStencilState &depthStencilState);
+ void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
+ GLint ref,
+ const gl::DepthStencilState &depthStencilState);
+ void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState);
+ void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState);
+ void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
- void updateStencilBackWriteMask(const gl::DepthStencilState &depthStencilState,
+ void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
+ const gl::DepthStencilState &depthStencilState,
const gl::Framebuffer *drawFramebuffer);
// Depth offset.
- void updatePolygonOffsetFillEnabled(bool enabled);
- void updatePolygonOffset(const gl::RasterizerState &rasterState);
+ void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
+ void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
+ const gl::RasterizerState &rasterState);
private:
VertexInputBindings mVertexInputBindings;
@@ -321,7 +403,7 @@
RenderPassDesc mRenderPassDesc;
PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
PackedDepthStencilStateInfo mDepthStencilStateInfo;
- PackedInputAssemblyAndColorBlendStateInfo mInputAssembltyAndColorBlendStateInfo;
+ PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
// Viewport and scissor are applied as dynamic state.
};
@@ -329,12 +411,7 @@
// This is not guaranteed by the spec, but is validated by a compile-time check.
// No gaps or padding at the end ensures that hashing and memcmp checks will not run
// into uninitialized memory regions.
-constexpr size_t kGraphicsPipelineDescSumOfSizes =
- kVertexInputBindingsSize + kVertexInputAttributesSize +
- kPackedInputAssemblyAndColorBlendStateSize + kPackedRasterizationAndMultisampleStateSize +
- kPackedDepthStencilStateSize + kRenderPassDescSize;
-
-static constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
+constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
constexpr uint32_t kMaxDescriptorSetLayoutBindings = gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES;
@@ -430,6 +507,43 @@
// Disable warnings about struct padding.
ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
+
+class PipelineHelper;
+
+struct GraphicsPipelineTransition
+{
+ GraphicsPipelineTransitionBits bits;
+ const GraphicsPipelineDesc *desc;
+ PipelineHelper *target;
+};
+
+class PipelineHelper final : angle::NonCopyable
+{
+ public:
+ PipelineHelper();
+ ~PipelineHelper();
+ explicit PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
+
+ void destroy(VkDevice device);
+
+ void updateSerial(Serial serial) { mSerial = serial; }
+ bool valid() const { return mPipeline.valid(); }
+ Serial getSerial() const { return mSerial; }
+ Pipeline &getPipeline() { return mPipeline; }
+
+ bool findTransition(GraphicsPipelineTransitionBits bits,
+ const GraphicsPipelineDesc &desc,
+ PipelineHelper **pipelineOut) const;
+ void addTransition(GraphicsPipelineTransitionBits bits,
+ const GraphicsPipelineDesc *desc,
+ PipelineHelper *pipeline);
+
+ private:
+ std::vector<GraphicsPipelineTransition> mTransitions;
+ Serial mSerial;
+ Pipeline mPipeline;
+};
+
} // namespace vk
} // namespace rx
@@ -538,18 +652,20 @@
const vk::ShaderModule &vertexModule,
const vk::ShaderModule &fragmentModule,
const vk::GraphicsPipelineDesc &desc,
- vk::PipelineAndSerial **pipelineOut)
+ const vk::GraphicsPipelineDesc **descPtrOut,
+ vk::PipelineHelper **pipelineOut)
{
auto item = mPayload.find(desc);
if (item != mPayload.end())
{
+ *descPtrOut = &item->first;
*pipelineOut = &item->second;
return angle::Result::Continue;
}
return insertPipeline(context, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
activeAttribLocationsMask, vertexModule, fragmentModule, desc,
- pipelineOut);
+ descPtrOut, pipelineOut);
}
private:
@@ -561,9 +677,10 @@
const vk::ShaderModule &vertexModule,
const vk::ShaderModule &fragmentModule,
const vk::GraphicsPipelineDesc &desc,
- vk::PipelineAndSerial **pipelineOut);
+ const vk::GraphicsPipelineDesc **descPtrOut,
+ vk::PipelineHelper **pipelineOut);
- std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineAndSerial> mPayload;
+ std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
};
class DescriptorSetLayoutCache final : angle::NonCopyable
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.h b/src/libANGLE/renderer/vulkan/vk_helpers.h
index 389b386..e6228a1 100644
--- a/src/libANGLE/renderer/vulkan/vk_helpers.h
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.h
@@ -641,7 +641,8 @@
const PipelineLayout &pipelineLayout,
const GraphicsPipelineDesc &pipelineDesc,
const gl::AttributesMask &activeAttribLocationsMask,
- PipelineAndSerial **pipelineOut)
+ const vk::GraphicsPipelineDesc **descPtrOut,
+ PipelineHelper **pipelineOut)
{
// Pull in a compatible RenderPass.
vk::RenderPass *compatibleRenderPass = nullptr;
@@ -651,7 +652,7 @@
return mGraphicsPipelines.getPipeline(
context, pipelineCache, *compatibleRenderPass, pipelineLayout,
activeAttribLocationsMask, mShaders[gl::ShaderType::Vertex].get().get(),
- mShaders[gl::ShaderType::Fragment].get().get(), pipelineDesc, pipelineOut);
+ mShaders[gl::ShaderType::Fragment].get().get(), pipelineDesc, descPtrOut, pipelineOut);
}
angle::Result getComputePipeline(Context *context,
@@ -661,6 +662,8 @@
private:
gl::ShaderMap<BindingPointer<ShaderAndSerial>> mShaders;
GraphicsPipelineCache mGraphicsPipelines;
+
+ // We should probably use PipelineHelper here so we can remove PipelineAndSerial.
PipelineAndSerial mComputePipeline;
};
} // namespace vk