Optimize Fragment Shader Type Match Validation

Improves ValidateFragmentShaderColorBufferTypeMatch by storing input and
output types into a bitmask for quick comparison when validation is
needed. This shows a 2% improvement to glDrawElements for the aquarium
workload.

BUG=angleproject:2203
Change-Id: Iade2ecf28383164e370b48442f01fba6c0962fba
Reviewed-on: https://chromium-review.googlesource.com/775019
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/libANGLE/ErrorStrings.h b/src/libANGLE/ErrorStrings.h
index 93d6448..129bd59 100644
--- a/src/libANGLE/ErrorStrings.h
+++ b/src/libANGLE/ErrorStrings.h
@@ -26,6 +26,8 @@
 ERRMSG(DefaultFramebufferInvalidAttachment,
        "Invalid attachment when the default framebuffer is bound.");
 ERRMSG(DefaultFramebufferTarget, "It is invalid to change default FBO's attachments");
+ERRMSG(DrawBufferTypeMismatch,
+       "Fragment shader output type does not match the bound framebuffer attachment type.");
 ERRMSG(EnumNotSupported, "Enum is not currently supported.");
 ERRMSG(EnumRequiresGLES31, "Enum requires GLES 3.1");
 ERRMSG(ES31Required, "OpenGL ES 3.1 Required");
diff --git a/src/libANGLE/Framebuffer.cpp b/src/libANGLE/Framebuffer.cpp
index 48e7168..187733f 100644
--- a/src/libANGLE/Framebuffer.cpp
+++ b/src/libANGLE/Framebuffer.cpp
@@ -19,6 +19,7 @@
 #include "libANGLE/Renderbuffer.h"
 #include "libANGLE/Surface.h"
 #include "libANGLE/Texture.h"
+#include "libANGLE/angletypes.h"
 #include "libANGLE/formatutils.h"
 #include "libANGLE/renderer/ContextImpl.h"
 #include "libANGLE/renderer/FramebufferImpl.h"
@@ -260,6 +261,7 @@
       mColorAttachments(1),
       mDrawBufferStates(1, GL_BACK),
       mReadBufferState(GL_BACK),
+      mDrawBufferTypeMask(),
       mDefaultWidth(0),
       mDefaultHeight(0),
       mDefaultSamples(0),
@@ -275,6 +277,7 @@
       mColorAttachments(caps.maxColorAttachments),
       mDrawBufferStates(caps.maxDrawBuffers, GL_NONE),
       mReadBufferState(GL_COLOR_ATTACHMENT0_EXT),
+      mDrawBufferTypeMask(),
       mDefaultWidth(0),
       mDefaultHeight(0),
       mDefaultSamples(0),
@@ -621,6 +624,7 @@
                           FramebufferAttachment::kDefaultMultiviewLayout,
                           FramebufferAttachment::kDefaultViewportOffsets);
     }
+    mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(0), 0);
 }
 
 Framebuffer::Framebuffer(rx::GLImplFactory *factory)
@@ -632,6 +636,7 @@
       mDirtyStencilAttachmentBinding(this, DIRTY_BIT_STENCIL_ATTACHMENT)
 {
     mDirtyColorAttachmentBindings.emplace_back(this, DIRTY_BIT_COLOR_ATTACHMENT_0);
+    mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(0), 0);
 }
 
 Framebuffer::~Framebuffer()
@@ -823,8 +828,12 @@
     mDirtyBits.set(DIRTY_BIT_DRAW_BUFFERS);
 
     mState.mEnabledDrawBuffers.reset();
+    mState.mDrawBufferTypeMask.reset();
+
     for (size_t index = 0; index < count; ++index)
     {
+        mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(index), index);
+
         if (drawStates[index] != GL_NONE && mState.mColorAttachments[index].isAttached())
         {
             mState.mEnabledDrawBuffers.set(index);
@@ -857,6 +866,16 @@
     }
 }
 
+DrawBufferTypeMask Framebuffer::getDrawBufferTypeMask() const
+{
+    return mState.mDrawBufferTypeMask;
+}
+
+DrawBufferMask Framebuffer::getDrawBufferMask() const
+{
+    return mState.mEnabledDrawBuffers;
+}
+
 bool Framebuffer::hasEnabledDrawBuffer() const
 {
     for (size_t drawbufferIdx = 0; drawbufferIdx < mState.mDrawBufferStates.size(); ++drawbufferIdx)
@@ -1689,6 +1708,7 @@
             // formsRenderingFeedbackLoopWith
             bool enabled = (type != GL_NONE && getDrawBufferState(colorIndex) != GL_NONE);
             mState.mEnabledDrawBuffers.set(colorIndex, enabled);
+            mState.mDrawBufferTypeMask.setIndex(getDrawbufferWriteType(colorIndex), colorIndex);
         }
         break;
     }
diff --git a/src/libANGLE/Framebuffer.h b/src/libANGLE/Framebuffer.h
index cd1b28b..25985d1 100644
--- a/src/libANGLE/Framebuffer.h
+++ b/src/libANGLE/Framebuffer.h
@@ -111,6 +111,7 @@
     std::vector<GLenum> mDrawBufferStates;
     GLenum mReadBufferState;
     DrawBufferMask mEnabledDrawBuffers;
+    DrawBufferTypeMask mDrawBufferTypeMask;
 
     GLint mDefaultWidth;
     GLint mDefaultHeight;
@@ -196,6 +197,8 @@
     void setDrawBuffers(size_t count, const GLenum *buffers);
     const FramebufferAttachment *getDrawBuffer(size_t drawBuffer) const;
     GLenum getDrawbufferWriteType(size_t drawBuffer) const;
+    DrawBufferTypeMask getDrawBufferTypeMask() const;
+    DrawBufferMask getDrawBufferMask() const;
     bool hasEnabledDrawBuffer() const;
 
     GLenum getReadBufferState() const;
diff --git a/src/libANGLE/MemoryProgramCache.cpp b/src/libANGLE/MemoryProgramCache.cpp
index 57172c5..5aa5480 100644
--- a/src/libANGLE/MemoryProgramCache.cpp
+++ b/src/libANGLE/MemoryProgramCache.cpp
@@ -371,6 +371,11 @@
     {
         state->mOutputVariableTypes.push_back(stream.readInt<GLenum>());
     }
+
+    static_assert(IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK == 8 * sizeof(uint16_t),
+                  "All bits of DrawBufferTypeMask can be contained in an uint16_t");
+    state->mDrawBufferTypeMask.from_ulong(stream.readInt<uint16_t>());
+
     static_assert(IMPLEMENTATION_MAX_DRAW_BUFFERS < 8 * sizeof(uint32_t),
                   "All bits of DrawBufferMask can be contained in an uint32_t");
     state->mActiveOutputVariables = stream.readInt<uint32_t>();
@@ -541,6 +546,10 @@
         stream.writeInt(outputVariableType);
     }
 
+    static_assert(IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK == 8 * sizeof(uint16_t),
+                  "All bits of DrawBufferTypeMask can be contained in an uint16_t");
+    stream.writeInt(static_cast<uint32_t>(state.mDrawBufferTypeMask.to_ulong()));
+
     static_assert(IMPLEMENTATION_MAX_DRAW_BUFFERS < 8 * sizeof(uint32_t),
                   "All bits of DrawBufferMask can be contained in an uint32_t");
     stream.writeInt(static_cast<uint32_t>(state.mActiveOutputVariables.to_ulong()));
diff --git a/src/libANGLE/Program.cpp b/src/libANGLE/Program.cpp
index 86a1622..71b8264 100644
--- a/src/libANGLE/Program.cpp
+++ b/src/libANGLE/Program.cpp
@@ -1008,6 +1008,7 @@
     mState.mOutputVariables.clear();
     mState.mOutputLocations.clear();
     mState.mOutputVariableTypes.clear();
+    mState.mDrawBufferTypeMask.reset();
     mState.mActiveOutputVariables.reset();
     mState.mComputeShaderLocalSize.fill(1);
     mState.mSamplerBindings.clear();
@@ -2803,7 +2804,6 @@
     return merged;
 }
 
-
 void Program::linkOutputVariables(const Context *context)
 {
     Shader *fragmentShader = mState.mAttachedFragmentShader;
@@ -2811,6 +2811,7 @@
 
     ASSERT(mState.mOutputVariableTypes.empty());
     ASSERT(mState.mActiveOutputVariables.none());
+    ASSERT(mState.mDrawBufferTypeMask.none());
 
     // Gather output variable types
     for (const auto &outputVariable : fragmentShader->getActiveOutputVariables(context))
@@ -2838,6 +2839,7 @@
             ASSERT(location < mState.mActiveOutputVariables.size());
             mState.mActiveOutputVariables.set(location);
             mState.mOutputVariableTypes[location] = VariableComponentType(outputVariable.type);
+            mState.mDrawBufferTypeMask.setIndex(mState.mOutputVariableTypes[location], location);
         }
     }
 
diff --git a/src/libANGLE/Program.h b/src/libANGLE/Program.h
index ef02b10..33e28ef 100644
--- a/src/libANGLE/Program.h
+++ b/src/libANGLE/Program.h
@@ -369,6 +369,7 @@
 
     // Fragment output variable base types: FLOAT, INT, or UINT.  Ordered by location.
     std::vector<GLenum> mOutputVariableTypes;
+    DrawBufferTypeMask mDrawBufferTypeMask;
 
     bool mBinaryRetrieveableHint;
     bool mSeparable;
@@ -627,6 +628,8 @@
     int getNumViews() const { return mState.getNumViews(); }
     bool usesMultiview() const { return mState.usesMultiview(); }
 
+    DrawBufferTypeMask getDrawBufferTypeMask() const { return mState.mDrawBufferTypeMask; }
+
   private:
     ~Program() override;
 
diff --git a/src/libANGLE/angletypes.cpp b/src/libANGLE/angletypes.cpp
index 702d391..467c91c 100644
--- a/src/libANGLE/angletypes.cpp
+++ b/src/libANGLE/angletypes.cpp
@@ -256,4 +256,98 @@
 {
     return !(lhs == rhs);
 }
+
+DrawBufferTypeMask::DrawBufferTypeMask()
+{
+    mTypeMask.reset();
+}
+
+DrawBufferTypeMask::DrawBufferTypeMask(const DrawBufferTypeMask &other) = default;
+
+DrawBufferTypeMask::~DrawBufferTypeMask() = default;
+
+void DrawBufferTypeMask::reset()
+{
+    mTypeMask.reset();
+}
+
+bool DrawBufferTypeMask::none()
+{
+    if (mTypeMask.none())
+    {
+        return true;
+    }
+
+    return false;
+}
+
+void DrawBufferTypeMask::setIndex(GLenum type, size_t index)
+{
+    ASSERT(index <= IMPLEMENTATION_MAX_DRAW_BUFFERS);
+
+    mTypeMask &= ~(0x101 << index);
+
+    uint16_t m = 0;
+    switch (type)
+    {
+        case GL_INT:
+            m = 0x001;
+            break;
+        case GL_UNSIGNED_INT:
+            m = 0x100;
+            break;
+        case GL_FLOAT:
+            m = 0x101;
+            break;
+        case GL_NONE:
+            m = 0x000;
+            break;
+        default:
+            UNREACHABLE();
+    }
+
+    mTypeMask |= m << index;
+}
+
+unsigned long DrawBufferTypeMask::to_ulong() const
+{
+    return mTypeMask.to_ulong();
+}
+
+void DrawBufferTypeMask::from_ulong(unsigned long mask)
+{
+    mTypeMask = mask;
+}
+
+bool DrawBufferTypeMask::ProgramOutputsMatchFramebuffer(DrawBufferTypeMask outputTypes,
+                                                        DrawBufferTypeMask inputTypes,
+                                                        DrawBufferMask outputMask,
+                                                        DrawBufferMask inputMask)
+{
+    static_assert(IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK == 16,
+                  "Draw buffer type masks should fit into 16 bits. 2 bits per draw buffer.");
+    static_assert(IMPLEMENTATION_MAX_DRAW_BUFFERS == 8,
+                  "Output/Input masks should fit into 8 bits. 1 bit per draw buffer");
+
+    // For performance reasons, draw buffer type validation is done using bit masks. We store two
+    // bits representing the type split, with the low bit in the lower 8 bits of the variable,
+    // and the high bit in the upper 8 bits of the variable. This is done so we can AND with the
+    // elswewhere used DrawBufferMask.
+    const unsigned long outputTypeBits = outputTypes.to_ulong();
+    const unsigned long inputTypeBits  = inputTypes.to_ulong();
+
+    unsigned long outputMaskBits = outputMask.to_ulong();
+    unsigned long inputMaskBits  = inputMask.to_ulong();
+
+    // OR the masks with themselves, shifted 8 bits. This is to match our split type bits.
+    outputMaskBits |= (outputMaskBits << 8);
+    inputMaskBits |= (inputMaskBits << 8);
+
+    // To validate:
+    // 1. Remove any indexes that are not enabled in the framebuffer (& inputMask)
+    // 2. Remove any indexes that exist in program, but not in framebuffer (& outputMask)
+    // 3. Use XOR to check for a match
+    return (outputTypeBits & inputMaskBits) == ((inputTypeBits & outputMaskBits) & inputMaskBits);
+}
+
 }  // namespace gl
diff --git a/src/libANGLE/angletypes.h b/src/libANGLE/angletypes.h
index 907f008..6d84868 100644
--- a/src/libANGLE/angletypes.h
+++ b/src/libANGLE/angletypes.h
@@ -288,8 +288,27 @@
 // Used in Program
 using UniformBlockBindingMask = angle::BitSet<IMPLEMENTATION_MAX_COMBINED_SHADER_UNIFORM_BUFFERS>;
 
-// Used in Framebuffer
+// Used in Framebuffer / Program
 using DrawBufferMask = angle::BitSet<IMPLEMENTATION_MAX_DRAW_BUFFERS>;
+constexpr int IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK = 16;
+struct DrawBufferTypeMask final
+{
+    DrawBufferTypeMask();
+    DrawBufferTypeMask(const DrawBufferTypeMask &other);
+    ~DrawBufferTypeMask();
+    void reset();
+    bool none();
+    void setIndex(GLenum type, size_t index);
+    unsigned long to_ulong() const;
+    void from_ulong(unsigned long mask);
+    static bool ProgramOutputsMatchFramebuffer(DrawBufferTypeMask outputTypes,
+                                               DrawBufferTypeMask inputTypes,
+                                               DrawBufferMask outputMask,
+                                               DrawBufferMask inputMask);
+
+  private:
+    angle::BitSet<IMPLEMENTATION_MAX_DRAW_BUFFER_TYPE_MASK> mTypeMask;
+};
 
 using ContextID = uintptr_t;
 
diff --git a/src/libANGLE/validationES.cpp b/src/libANGLE/validationES.cpp
index ae564b7..a0617c2 100644
--- a/src/libANGLE/validationES.cpp
+++ b/src/libANGLE/validationES.cpp
@@ -397,18 +397,12 @@
     const Program *program         = context->getGLState().getProgram();
     const Framebuffer *framebuffer = context->getGLState().getDrawFramebuffer();
 
-    const auto &programOutputTypes = program->getOutputVariableTypes();
-    for (size_t drawBufferIdx = 0; drawBufferIdx < programOutputTypes.size(); drawBufferIdx++)
+    if (!DrawBufferTypeMask::ProgramOutputsMatchFramebuffer(
+            program->getDrawBufferTypeMask(), framebuffer->getDrawBufferTypeMask(),
+            program->getActiveOutputVariables(), framebuffer->getDrawBufferMask()))
     {
-        GLenum outputType = programOutputTypes[drawBufferIdx];
-        GLenum inputType  = framebuffer->getDrawbufferWriteType(drawBufferIdx);
-        if (outputType != GL_NONE && inputType != GL_NONE && inputType != outputType)
-        {
-            context->handleError(InvalidOperation() << "Fragment shader output type does not "
-                                                       "match the bound framebuffer attachment "
-                                                       "type.");
-            return false;
-        }
+        ANGLE_VALIDATION_ERR(context, InvalidOperation(), DrawBufferTypeMismatch);
+        return false;
     }
 
     return true;