Optimize Fragment Shader Type Match Validation
Improves ValidateFragmentShaderColorBufferTypeMatch by storing input and
output types into a bitmask for quick comparison when validation is
needed. This shows a 2% improvement to glDrawElements for the aquarium
workload.
BUG=angleproject:2203
Change-Id: Iade2ecf28383164e370b48442f01fba6c0962fba
Reviewed-on: https://chromium-review.googlesource.com/775019
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/libANGLE/ErrorStrings.h b/src/libANGLE/ErrorStrings.h
index 93d6448..129bd59 100644
--- a/src/libANGLE/ErrorStrings.h
+++ b/src/libANGLE/ErrorStrings.h
@@ -26,6 +26,8 @@
ERRMSG(DefaultFramebufferInvalidAttachment,
"Invalid attachment when the default framebuffer is bound.");
ERRMSG(DefaultFramebufferTarget, "It is invalid to change default FBO's attachments");
+ERRMSG(DrawBufferTypeMismatch,
+ "Fragment shader output type does not match the bound framebuffer attachment type.");
ERRMSG(EnumNotSupported, "Enum is not currently supported.");
ERRMSG(EnumRequiresGLES31, "Enum requires GLES 3.1");
ERRMSG(ES31Required, "OpenGL ES 3.1 Required");
diff --git a/src/libANGLE/Framebuffer.cpp b/src/libANGLE/Framebuffer.cpp
index 48e7168..187733f 100644
--- a/src/libANGLE/Framebuffer.cpp
+++ b/src/libANGLE/Framebuffer.cpp
@@ -19,6 +19,7 @@
#include "libANGLE/Renderbuffer.h"
#include "libANGLE/Surface.h"
#include "libANGLE/Texture.h"
+#include "libANGLE/angletypes.h"
#include "libANGLE/formatutils.h"
#include "libANGLE/renderer/ContextImpl.h"
#include "libANGLE/renderer/FramebufferImpl.h"
@@ -260,6 +261,7 @@
mColorAttachments(1),
mDrawBufferStates(1, GL_BACK),
mReadBufferState(GL_BACK),
+ mDrawBufferTypeMask(),
mDefaultWidth(0),
mDefaultHeight(0),
mDefaultSamples(0),
@@ -275,6 +277,7 @@
mColorAttachments(caps.maxColorAttachments),
mDrawBufferStates(caps.maxDrawBuffers, GL_NONE),
mReadBufferState(GL_COLOR_ATTACHMENT0_EXT),
+ mDrawBufferTypeMask(),
mDefaultWidth(0),
mDefaultHeight(0),
mDefaultSamples(0),
@@ -621,6 +624,7 @@
FramebufferAttachment::kDefaultMultiviewLayout,
FramebufferAttachment::kDefaultViewportOffsets);
}
+ mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(0), 0);
}
Framebuffer::Framebuffer(rx::GLImplFactory *factory)
@@ -632,6 +636,7 @@
mDirtyStencilAttachmentBinding(this, DIRTY_BIT_STENCIL_ATTACHMENT)
{
mDirtyColorAttachmentBindings.emplace_back(this, DIRTY_BIT_COLOR_ATTACHMENT_0);
+ mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(0), 0);
}
Framebuffer::~Framebuffer()
@@ -823,8 +828,12 @@
mDirtyBits.set(DIRTY_BIT_DRAW_BUFFERS);
mState.mEnabledDrawBuffers.reset();
+ mState.mDrawBufferTypeMask.reset();
+
for (size_t index = 0; index < count; ++index)
{
+ mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(index), index);
+
if (drawStates[index] != GL_NONE && mState.mColorAttachments[index].isAttached())
{
mState.mEnabledDrawBuffers.set(index);
@@ -857,6 +866,16 @@
}
}
+DrawBufferTypeMask Framebuffer::getDrawBufferTypeMask() const
+{
+ return mState.mDrawBufferTypeMask;
+}
+
+DrawBufferMask Framebuffer::getDrawBufferMask() const
+{
+ return mState.mEnabledDrawBuffers;
+}
+
bool Framebuffer::hasEnabledDrawBuffer() const
{
for (size_t drawbufferIdx = 0; drawbufferIdx < mState.mDrawBufferStates.size(); ++drawbufferIdx)
@@ -1689,6 +1708,7 @@
// formsRenderingFeedbackLoopWith
bool enabled = (type != GL_NONE && getDrawBufferState(colorIndex) != GL_NONE);
mState.mEnabledDrawBuffers.set(colorIndex, enabled);
+ mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(colorIndex), colorIndex);
}
break;
}
diff --git a/src/libANGLE/Framebuffer.h b/src/libANGLE/Framebuffer.h
index cd1b28b..25985d1 100644
--- a/src/libANGLE/Framebuffer.h
+++ b/src/libANGLE/Framebuffer.h
@@ -111,6 +111,7 @@
std::vector<GLenum> mDrawBufferStates;
GLenum mReadBufferState;
DrawBufferMask mEnabledDrawBuffers;
+ DrawBufferTypeMask mDrawBufferTypeMask;
GLint mDefaultWidth;
GLint mDefaultHeight;
@@ -196,6 +197,8 @@
void setDrawBuffers(size_t count, const GLenum *buffers);
const FramebufferAttachment *getDrawBuffer(size_t drawBuffer) const;
GLenum getDrawbufferWriteType(size_t drawBuffer) const;
+ DrawBufferTypeMask getDrawBufferTypeMask() const;
+ DrawBufferMask getDrawBufferMask() const;
bool hasEnabledDrawBuffer() const;
GLenum getReadBufferState() const;
diff --git a/src/libANGLE/MemoryProgramCache.cpp b/src/libANGLE/MemoryProgramCache.cpp
index 57172c5..5aa5480 100644
--- a/src/libANGLE/MemoryProgramCache.cpp
+++ b/src/libANGLE/MemoryProgramCache.cpp
@@ -371,6 +371,11 @@
{
state->mOutputVariableTypes.push_back(stream.readInt<GLenum>());
}
+
+ static_assert(IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK == 8 * sizeof(uint16_t),
+ "All bits of DrawBufferTypeMask can be contained in an uint16_t");
+ state->mDrawBufferTypeMask.from_ulong(stream.readInt<uint16_t>());
+
static_assert(IMPLEMENTATION_MAX_DRAW_BUFFERS < 8 * sizeof(uint32_t),
"All bits of DrawBufferMask can be contained in an uint32_t");
state->mActiveOutputVariables = stream.readInt<uint32_t>();
@@ -541,6 +546,10 @@
stream.writeInt(outputVariableType);
}
+ static_assert(IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK == 8 * sizeof(uint16_t),
+ "All bits of DrawBufferTypeMask can be contained in an uint16_t");
+ stream.writeInt(static_cast<uint32_t>(state.mDrawBufferTypeMask.to_ulong()));
+
static_assert(IMPLEMENTATION_MAX_DRAW_BUFFERS < 8 * sizeof(uint32_t),
"All bits of DrawBufferMask can be contained in an uint32_t");
stream.writeInt(static_cast<uint32_t>(state.mActiveOutputVariables.to_ulong()));
diff --git a/src/libANGLE/Program.cpp b/src/libANGLE/Program.cpp
index 86a1622..71b8264 100644
--- a/src/libANGLE/Program.cpp
+++ b/src/libANGLE/Program.cpp
@@ -1008,6 +1008,7 @@
mState.mOutputVariables.clear();
mState.mOutputLocations.clear();
mState.mOutputVariableTypes.clear();
+ mState.mDrawBufferTypeMask.reset();
mState.mActiveOutputVariables.reset();
mState.mComputeShaderLocalSize.fill(1);
mState.mSamplerBindings.clear();
@@ -2803,7 +2804,6 @@
return merged;
}
-
void Program::linkOutputVariables(const Context *context)
{
Shader *fragmentShader = mState.mAttachedFragmentShader;
@@ -2811,6 +2811,7 @@
ASSERT(mState.mOutputVariableTypes.empty());
ASSERT(mState.mActiveOutputVariables.none());
+ ASSERT(mState.mDrawBufferTypeMask.none());
// Gather output variable types
for (const auto &outputVariable : fragmentShader->getActiveOutputVariables(context))
@@ -2838,6 +2839,7 @@
ASSERT(location < mState.mActiveOutputVariables.size());
mState.mActiveOutputVariables.set(location);
mState.mOutputVariableTypes[location] = VariableComponentType(outputVariable.type);
+ mState.mDrawBufferTypeMask.setIndex(mState.mOutputVariableTypes[location], location);
}
}
diff --git a/src/libANGLE/Program.h b/src/libANGLE/Program.h
index ef02b10..33e28ef 100644
--- a/src/libANGLE/Program.h
+++ b/src/libANGLE/Program.h
@@ -369,6 +369,7 @@
// Fragment output variable base types: FLOAT, INT, or UINT. Ordered by location.
std::vector<GLenum> mOutputVariableTypes;
+ DrawBufferTypeMask mDrawBufferTypeMask;
bool mBinaryRetrieveableHint;
bool mSeparable;
@@ -627,6 +628,8 @@
int getNumViews() const { return mState.getNumViews(); }
bool usesMultiview() const { return mState.usesMultiview(); }
+ DrawBufferTypeMask getDrawBufferTypeMask() const { return mState.mDrawBufferTypeMask; }
+
private:
~Program() override;
diff --git a/src/libANGLE/angletypes.cpp b/src/libANGLE/angletypes.cpp
index 702d391..467c91c 100644
--- a/src/libANGLE/angletypes.cpp
+++ b/src/libANGLE/angletypes.cpp
@@ -256,4 +256,98 @@
{
return !(lhs == rhs);
}
+
+DrawBufferTypeMask::DrawBufferTypeMask()
+{
+ mTypeMask.reset();
+}
+
+DrawBufferTypeMask::DrawBufferTypeMask(const DrawBufferTypeMask &other) = default;
+
+DrawBufferTypeMask::~DrawBufferTypeMask() = default;
+
+void DrawBufferTypeMask::reset()
+{
+ mTypeMask.reset();
+}
+
+bool DrawBufferTypeMask::none()
+{
+ if (mTypeMask.none())
+ {
+ return true;
+ }
+
+ return false;
+}
+
+void DrawBufferTypeMask::setIndex(GLenum type, size_t index)
+{
+ ASSERT(index <= IMPLEMENTATION_MAX_DRAW_BUFFERS);
+
+ mTypeMask &= ~(0x101 << index);
+
+ uint16_t m = 0;
+ switch (type)
+ {
+ case GL_INT:
+ m = 0x001;
+ break;
+ case GL_UNSIGNED_INT:
+ m = 0x100;
+ break;
+ case GL_FLOAT:
+ m = 0x101;
+ break;
+ case GL_NONE:
+ m = 0x000;
+ break;
+ default:
+ UNREACHABLE();
+ }
+
+ mTypeMask |= m << index;
+}
+
+unsigned long DrawBufferTypeMask::to_ulong() const
+{
+ return mTypeMask.to_ulong();
+}
+
+void DrawBufferTypeMask::from_ulong(unsigned long mask)
+{
+ mTypeMask = mask;
+}
+
+bool DrawBufferTypeMask::ProgramOutputsMatchFramebuffer(DrawBufferTypeMask outputTypes,
+ DrawBufferTypeMask inputTypes,
+ DrawBufferMask outputMask,
+ DrawBufferMask inputMask)
+{
+ static_assert(IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK == 16,
+ "Draw buffer type masks should fit into 16 bits. 2 bits per draw buffer.");
+ static_assert(IMPLEMENTATION_MAX_DRAW_BUFFERS == 8,
+ "Output/Input masks should fit into 8 bits. 1 bit per draw buffer");
+
+ // For performance reasons, draw buffer type validation is done using bit masks. We store two
+ // bits representing the type split, with the low bit in the lower 8 bits of the variable,
+ // and the high bit in the upper 8 bits of the variable. This is done so we can AND with the
+ // elswewhere used DrawBufferMask.
+ const unsigned long outputTypeBits = outputTypes.to_ulong();
+ const unsigned long inputTypeBits = inputTypes.to_ulong();
+
+ unsigned long outputMaskBits = outputMask.to_ulong();
+ unsigned long inputMaskBits = inputMask.to_ulong();
+
+ // OR the masks with themselves, shifted 8 bits. This is to match our split type bits.
+ outputMaskBits |= (outputMaskBits << 8);
+ inputMaskBits |= (inputMaskBits << 8);
+
+ // To validate:
+ // 1. Remove any indexes that are not enabled in the framebuffer (& inputMask)
+ // 2. Remove any indexes that exist in program, but not in framebuffer (& outputMask)
+ // 3. Use XOR to check for a match
+ return (outputTypeBits & inputMaskBits) == ((inputTypeBits & outputMaskBits) & inputMaskBits);
+}
+
} // namespace gl
diff --git a/src/libANGLE/angletypes.h b/src/libANGLE/angletypes.h
index 907f008..6d84868 100644
--- a/src/libANGLE/angletypes.h
+++ b/src/libANGLE/angletypes.h
@@ -288,8 +288,27 @@
// Used in Program
using UniformBlockBindingMask = angle::BitSet<IMPLEMENTATION_MAX_COMBINED_SHADER_UNIFORM_BUFFERS>;
-// Used in Framebuffer
+// Used in Framebuffer / Program
using DrawBufferMask = angle::BitSet<IMPLEMENTATION_MAX_DRAW_BUFFERS>;
+constexpr int IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK = 16;
+struct DrawBufferTypeMask final
+{
+ DrawBufferTypeMask();
+ DrawBufferTypeMask(const DrawBufferTypeMask &other);
+ ~DrawBufferTypeMask();
+ void reset();
+ bool none();
+ void setIndex(GLenum type, size_t index);
+ unsigned long to_ulong() const;
+ void from_ulong(unsigned long mask);
+ static bool ProgramOutputsMatchFramebuffer(DrawBufferTypeMask outputTypes,
+ DrawBufferTypeMask inputTypes,
+ DrawBufferMask outputMask,
+ DrawBufferMask inputMask);
+
+ private:
+ angle::BitSet<IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK> mTypeMask;
+};
using ContextID = uintptr_t;
diff --git a/src/libANGLE/validationES.cpp b/src/libANGLE/validationES.cpp
index ae564b7..a0617c2 100644
--- a/src/libANGLE/validationES.cpp
+++ b/src/libANGLE/validationES.cpp
@@ -397,18 +397,12 @@
const Program *program = context->getGLState().getProgram();
const Framebuffer *framebuffer = context->getGLState().getDrawFramebuffer();
- const auto &programOutputTypes = program->getOutputVariableTypes();
- for (size_t drawBufferIdx = 0; drawBufferIdx < programOutputTypes.size(); drawBufferIdx++)
+ if (!DrawBufferTypeMask::ProgramOutputsMatchFramebuffer(
+ program->getDrawBufferTypeMask(), framebuffer->getDrawBufferTypeMask(),
+ program->getActiveOutputVariables(), framebuffer->getDrawBufferMask()))
{
- GLenum outputType = programOutputTypes[drawBufferIdx];
- GLenum inputType = framebuffer->getDrawbufferWriteType(drawBufferIdx);
- if (outputType != GL_NONE && inputType != GL_NONE && inputType != outputType)
- {
- context->handleError(InvalidOperation() << "Fragment shader output type does not "
- "match the bound framebuffer attachment "
- "type.");
- return false;
- }
+ ANGLE_VALIDATION_ERR(context, InvalidOperation(), DrawBufferTypeMismatch);
+ return false;
}
return true;