Optimize glDrawElements performance
A call to glDrawElements results in a calling depth of up to 4
* glDrawElements
* gl::Context::DrawElements
* rx::ContextGL::DrawElements
* VertexArrayGL::syncDrawState.
Each function call has to save/restore a lot of registers which
results in a stall in the prologue of rx::ContextGL::DrawElements
due to memory bandwidth limitations.
The main change is the function gl::Context::DrawElements being
inlined to reduce the calling depth by one. In addition the call
to ContextGL::syncDrawElementsState is now protected so that it
gets called only if it's required. Finally a few small getter
functions have been inlined where the calling code was bigger
than the actual function.
In total this change improves performance of the
DrawElementsPerfBenchmark.Run/gl benchmark by 16%.
Bug: angleproject:2966
Change-Id: I423d18452f2f5b520ab52850fda2054e1da86991
Reviewed-on: https://chromium-review.googlesource.com/c/1389988
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Commit-Queue: Markus Tavenrath <matavenrath@nvidia.com>
diff --git a/src/libANGLE/Context.cpp b/src/libANGLE/Context.cpp
index 5a62efe..b9b1b4e 100644
--- a/src/libANGLE/Context.cpp
+++ b/src/libANGLE/Context.cpp
@@ -8,6 +8,7 @@
// rendering operations. It is the GLES2 specific implementation of EGLContext.
#include "libANGLE/Context.h"
+#include "libANGLE/Context.inl.h"
#include <string.h>
#include <iterator>
@@ -25,7 +26,6 @@
#include "libANGLE/Fence.h"
#include "libANGLE/Framebuffer.h"
#include "libANGLE/FramebufferAttachment.h"
-#include "libANGLE/GLES1Renderer.h"
#include "libANGLE/Path.h"
#include "libANGLE/Program.h"
#include "libANGLE/ProgramPipeline.h"
@@ -42,7 +42,6 @@
#include "libANGLE/queryconversions.h"
#include "libANGLE/queryutils.h"
#include "libANGLE/renderer/BufferImpl.h"
-#include "libANGLE/renderer/ContextImpl.h"
#include "libANGLE/renderer/EGLImplFactory.h"
#include "libANGLE/renderer/Format.h"
#include "libANGLE/validationES.h"
@@ -2241,21 +2240,6 @@
MarkTransformFeedbackBufferUsage(this, count, instanceCount);
}
-void Context::drawElements(PrimitiveMode mode,
- GLsizei count,
- DrawElementsType type,
- const void *indices)
-{
- // No-op if count draws no primitives for given mode
- if (noopDraw(mode, count))
- {
- return;
- }
-
- ANGLE_CONTEXT_TRY(prepareForDraw(mode));
- ANGLE_CONTEXT_TRY(mImplementation->drawElements(this, mode, count, type, indices));
-}
-
void Context::drawElementsInstanced(PrimitiveMode mode,
GLsizei count,
DrawElementsType type,
@@ -3516,40 +3500,6 @@
return (instanceCount == 0) || noopDraw(mode, count);
}
-ANGLE_INLINE angle::Result Context::syncDirtyBits()
-{
- const State::DirtyBits &dirtyBits = mGLState.getDirtyBits();
- ANGLE_TRY(mImplementation->syncState(this, dirtyBits, mAllDirtyBits));
- mGLState.clearDirtyBits();
- return angle::Result::Continue;
-}
-
-ANGLE_INLINE angle::Result Context::syncDirtyBits(const State::DirtyBits &bitMask)
-{
- const State::DirtyBits &dirtyBits = (mGLState.getDirtyBits() & bitMask);
- ANGLE_TRY(mImplementation->syncState(this, dirtyBits, bitMask));
- mGLState.clearDirtyBits(dirtyBits);
- return angle::Result::Continue;
-}
-
-ANGLE_INLINE angle::Result Context::syncDirtyObjects(const State::DirtyObjects &objectMask)
-{
- return mGLState.syncDirtyObjects(this, objectMask);
-}
-
-ANGLE_INLINE angle::Result Context::prepareForDraw(PrimitiveMode mode)
-{
- if (mGLES1Renderer)
- {
- ANGLE_TRY(mGLES1Renderer->prepareForDraw(mode, this, &mGLState));
- }
-
- ANGLE_TRY(syncDirtyObjects(mDrawDirtyObjects));
- ASSERT(!isRobustResourceInitEnabled() ||
- !mGLState.getDrawFramebuffer()->hasResourceThatNeedsInit());
- return syncDirtyBits();
-}
-
angle::Result Context::prepareForClear(GLbitfield mask)
{
ANGLE_TRY(syncDirtyObjects(mClearDirtyObjects));
@@ -8495,7 +8445,7 @@
void StateCache::updateTransformFeedbackActiveUnpaused(Context *context)
{
- TransformFeedback *xfb = context->getGLState().getCurrentTransformFeedback();
+ TransformFeedback *xfb = context->getGLState().getCurrentTransformFeedback();
mCachedTransformFeedbackActiveUnpaused = xfb && xfb->isActive() && !xfb->isPaused();
}
} // namespace gl