Add code path that avoids large indexBuffer draws
The bulkrect_1000_random_uniqueimages_batch on a Nexus 6P/Adreno 430
w/o this CL
curr/maxrss loops min median mean max stddev samples config
304/304 MB 1 151ms 159ms 158ms 163ms 3% ▆█▇▄▆▆▁▂█▅ gles
w/ this CL
curr/maxrss loops min median mean max stddev samples config
286/286 MB 1 18.1ms 18.1ms 18.1ms 18.1ms 0% ▂▄▅▃▅▅▃▄▁█ gles
Change-Id: I0f6d690b953444ec7a3176cb27c8a253caa55f5d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/255986
Commit-Queue: Robert Phillips <robertphillips@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
diff --git a/src/gpu/GrCaps.cpp b/src/gpu/GrCaps.cpp
index ee2ba50..48dc58c 100644
--- a/src/gpu/GrCaps.cpp
+++ b/src/gpu/GrCaps.cpp
@@ -40,6 +40,7 @@
fDynamicStateArrayGeometryProcessorTextureSupport = false;
fPerformPartialClearsAsDraws = false;
fPerformColorClearsAsDraws = false;
+ fAvoidLargeIndexBufferDraws = false;
fPerformStencilClearsAsDraws = false;
fAllowCoverageCounting = false;
fTransferBufferSupport = false;
@@ -203,6 +204,7 @@
fDynamicStateArrayGeometryProcessorTextureSupport);
writer->appendBool("Use draws for partial clears", fPerformPartialClearsAsDraws);
writer->appendBool("Use draws for color clears", fPerformColorClearsAsDraws);
+ writer->appendBool("Avoid Large IndexBuffer Draws", fAvoidLargeIndexBufferDraws);
writer->appendBool("Use draws for stencil clip clears", fPerformStencilClearsAsDraws);
writer->appendBool("Allow coverage counting shortcuts", fAllowCoverageCounting);
writer->appendBool("Supports transfer buffers", fTransferBufferSupport);
diff --git a/src/gpu/GrCaps.h b/src/gpu/GrCaps.h
index d8be432..d501edc 100644
--- a/src/gpu/GrCaps.h
+++ b/src/gpu/GrCaps.h
@@ -332,6 +332,8 @@
// Many drivers have issues with color clears.
bool performColorClearsAsDraws() const { return fPerformColorClearsAsDraws; }
+ bool avoidLargeIndexBufferDraws() const { return fAvoidLargeIndexBufferDraws; }
+
/// Adreno 4xx devices experience an issue when there are a large number of stencil clip bit
/// clears. The minimal repro steps are not precisely known but drawing a rect with a stencil
/// op instead of using glClear seems to resolve the issue.
@@ -487,6 +489,7 @@
bool fClampToBorderSupport : 1;
bool fPerformPartialClearsAsDraws : 1;
bool fPerformColorClearsAsDraws : 1;
+ bool fAvoidLargeIndexBufferDraws : 1;
bool fPerformStencilClearsAsDraws : 1;
bool fAllowCoverageCounting : 1;
bool fTransferBufferSupport : 1;
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index c25595c..5678ef7 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -3365,6 +3365,12 @@
fPerformStencilClearsAsDraws = true;
}
+ if (ctxInfo.vendor() == kQualcomm_GrGLVendor) {
+ // It appears that all the Adreno GPUs have less than optimal performance when
+ // drawing w/ large index buffers.
+ fAvoidLargeIndexBufferDraws = true;
+ }
+
// This was reproduced on the following configurations:
// - A Galaxy J5 (Adreno 306) running Android 6 with driver 140.0
// - A Nexus 7 2013 (Adreno 320) running Android 5 with driver 104.0
diff --git a/src/gpu/ops/GrFillRectOp.cpp b/src/gpu/ops/GrFillRectOp.cpp
index e213283..0b35528 100644
--- a/src/gpu/ops/GrFillRectOp.cpp
+++ b/src/gpu/ops/GrFillRectOp.cpp
@@ -238,9 +238,9 @@
// Configure the mesh for the vertex data
GrMesh* mesh = target->allocMeshes(1);
- GrQuadPerEdgeAA::ConfigureMesh(mesh, vertexSpec, 0, fQuads.count(), totalNumVertices,
- std::move(vertexBuffer), std::move(indexBuffer),
- vertexOffsetInBuffer);
+ GrQuadPerEdgeAA::ConfigureMesh(target->caps(), mesh, vertexSpec, 0, fQuads.count(),
+ totalNumVertices, std::move(vertexBuffer),
+ std::move(indexBuffer), vertexOffsetInBuffer);
target->recordDraw(gp, mesh, 1, vertexSpec.primitiveType());
}
diff --git a/src/gpu/ops/GrQuadPerEdgeAA.cpp b/src/gpu/ops/GrQuadPerEdgeAA.cpp
index 70a8607..ca0c7c0 100644
--- a/src/gpu/ops/GrQuadPerEdgeAA.cpp
+++ b/src/gpu/ops/GrQuadPerEdgeAA.cpp
@@ -381,7 +381,7 @@
SkUNREACHABLE;
}
-void ConfigureMesh(GrMesh* mesh, const VertexSpec& spec,
+void ConfigureMesh(const GrCaps& caps, GrMesh* mesh, const VertexSpec& spec,
int runningQuadCount, int quadsInDraw, int maxVerts,
sk_sp<const GrBuffer> vertexBuffer,
sk_sp<const GrBuffer> indexBuffer, int absVertBufferOffset) {
@@ -403,28 +403,42 @@
spec.indexBufferOption() == IndexBufferOption::kIndexedRects);
SkASSERT(indexBuffer);
- int baseIndex, numIndicesToDraw;
- int minVertex, maxVertex;
+ int maxNumQuads, numIndicesPerQuad, numVertsPerQuad;
if (spec.indexBufferOption() == IndexBufferOption::kPictureFramed) {
- SkASSERT(runningQuadCount + quadsInDraw <= GrResourceProvider::MaxNumAAQuads());
// AA uses 8 vertices and 30 indices per quad, basically nested rectangles
- baseIndex = runningQuadCount * GrResourceProvider::NumIndicesPerAAQuad();
- numIndicesToDraw = quadsInDraw * GrResourceProvider::NumIndicesPerAAQuad();
- minVertex = runningQuadCount * GrResourceProvider::NumVertsPerAAQuad();
- maxVertex = (runningQuadCount + quadsInDraw) * GrResourceProvider::NumVertsPerAAQuad();
+ maxNumQuads = GrResourceProvider::MaxNumAAQuads();
+ numIndicesPerQuad = GrResourceProvider::NumIndicesPerAAQuad();
+ numVertsPerQuad = GrResourceProvider::NumVertsPerAAQuad();
} else {
- SkASSERT(runningQuadCount + quadsInDraw <= GrResourceProvider::MaxNumNonAAQuads());
// Non-AA uses 4 vertices and 6 indices per quad
- baseIndex = runningQuadCount * GrResourceProvider::NumIndicesPerNonAAQuad();
- numIndicesToDraw = quadsInDraw * GrResourceProvider::NumIndicesPerNonAAQuad();
- minVertex = runningQuadCount * GrResourceProvider::NumVertsPerNonAAQuad();
- maxVertex = (runningQuadCount + quadsInDraw) * GrResourceProvider::NumVertsPerNonAAQuad();
+ maxNumQuads = GrResourceProvider::MaxNumNonAAQuads();
+ numIndicesPerQuad = GrResourceProvider::NumIndicesPerNonAAQuad();
+ numVertsPerQuad = GrResourceProvider::NumVertsPerNonAAQuad();
}
- mesh->setIndexed(std::move(indexBuffer), numIndicesToDraw, baseIndex, minVertex, maxVertex,
- GrPrimitiveRestart::kNo);
- mesh->setVertexData(std::move(vertexBuffer), absVertBufferOffset);
+ SkASSERT(runningQuadCount + quadsInDraw <= maxNumQuads);
+
+ if (caps.avoidLargeIndexBufferDraws()) {
+ // When we need to avoid large index buffer draws we modify the base vertex of the draw
+ // which, in GL, requires rebinding all vertex attrib arrays, so a base index is generally
+ // preferred.
+ int offset = absVertBufferOffset + runningQuadCount * numVertsPerQuad;
+
+ mesh->setIndexedPatterned(std::move(indexBuffer), numIndicesPerQuad,
+ numVertsPerQuad, quadsInDraw, maxNumQuads);
+ mesh->setVertexData(std::move(vertexBuffer), offset);
+ } else {
+ int baseIndex = runningQuadCount * numIndicesPerQuad;
+ int numIndicesToDraw = quadsInDraw * numIndicesPerQuad;
+
+ int minVertex = runningQuadCount * numVertsPerQuad;
+ int maxVertex = (runningQuadCount + quadsInDraw) * numVertsPerQuad;
+
+ mesh->setIndexed(std::move(indexBuffer), numIndicesToDraw,
+ baseIndex, minVertex, maxVertex, GrPrimitiveRestart::kNo);
+ mesh->setVertexData(std::move(vertexBuffer), absVertBufferOffset);
+ }
}
////////////////// VertexSpec Implementation
diff --git a/src/gpu/ops/GrQuadPerEdgeAA.h b/src/gpu/ops/GrQuadPerEdgeAA.h
index 2b99c68..8a77fa7 100644
--- a/src/gpu/ops/GrQuadPerEdgeAA.h
+++ b/src/gpu/ops/GrQuadPerEdgeAA.h
@@ -186,8 +186,8 @@
// @param quadCount the number of quads that will be drawn by the provided 'mesh'.
// A subsequent ConfigureMesh call would the use
// 'runningQuadCount' + 'quadCount' for its new 'runningQuadCount'.
- void ConfigureMesh(GrMesh* mesh, const VertexSpec&, int runningQuadCount, int quadCount,
- int maxVerts, sk_sp<const GrBuffer> vertexBuffer,
+ void ConfigureMesh(const GrCaps&, GrMesh*, const VertexSpec&, int runningQuadCount,
+ int quadCount, int maxVerts, sk_sp<const GrBuffer> vertexBuffer,
sk_sp<const GrBuffer> indexBuffer, int absVertBufferOffset);
} // namespace GrQuadPerEdgeAA
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index 47cdbe8..c91db17 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -574,11 +574,11 @@
// At this juncture we only fill in the vertex data and state arrays. Filling in of
// the meshes is left until onPrepareDraws.
- SkAssertResult(FillInData(this, fPrePreparedDesc, fPrePreparedDesc->fVertices,
- nullptr, 0, nullptr, nullptr));
+ SkAssertResult(FillInData(*context->priv().caps(), this, fPrePreparedDesc,
+ fPrePreparedDesc->fVertices, nullptr, 0, nullptr, nullptr));
}
- static bool FillInData(TextureOp* texOp, PrePreparedDesc* desc,
+ static bool FillInData(const GrCaps& caps, TextureOp* texOp, PrePreparedDesc* desc,
char* pVertexData, GrMesh* meshes, int absBufferOffset,
sk_sp<const GrBuffer> vertexBuffer,
sk_sp<const GrBuffer> indexBuffer) {
@@ -611,7 +611,7 @@
}
if (meshes) {
- GrQuadPerEdgeAA::ConfigureMesh(&(meshes[meshIndex]), desc->fVertexSpec,
+ GrQuadPerEdgeAA::ConfigureMesh(caps, &(meshes[meshIndex]), desc->fVertexSpec,
totQuadsSeen, quadCnt, desc->totalNumVertices(),
vertexBuffer, indexBuffer, absBufferOffset);
}
@@ -793,12 +793,12 @@
memcpy(vdata, desc.fVertices, desc.totalSizeInBytes());
// The above memcpy filled in the vertex data - just call FillInData to fill in the
// mesh data
- result = FillInData(this, &desc, nullptr, meshes, vertexOffsetInBuffer,
+ result = FillInData(target->caps(), this, &desc, nullptr, meshes, vertexOffsetInBuffer,
std::move(vbuffer), std::move(indexBuffer));
} else {
// Fills in both vertex data and mesh data
- result = FillInData(this, &desc, (char*) vdata, meshes, vertexOffsetInBuffer,
- std::move(vbuffer), std::move(indexBuffer));
+ result = FillInData(target->caps(), this, &desc, (char*) vdata, meshes,
+ vertexOffsetInBuffer, std::move(vbuffer), std::move(indexBuffer));
}
if (!result) {