Speed up the fixed count parametric/radial sort
The fixed count stroke shader was executing a sort loop long enough to
handle 1024 parametric segments, when in reality it will never see
more than 48. This CL cuts the per-vertex sorting work nearly in half
by reducing the number of iterations from 10 to 6. (The indirect
tessellator continues to use 10 iterations.)
Bug: skia:10496
Change-Id: Idc21d6015d449f63915780972a8d487c9c6de6fe
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/412496
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/bench/TessellateBench.cpp b/bench/TessellateBench.cpp
index 5cc70d8..fc8ad69 100644
--- a/bench/TessellateBench.cpp
+++ b/bench/TessellateBench.cpp
@@ -211,21 +211,24 @@
}
using PathStrokeList = GrStrokeTessellator::PathStrokeList;
-using MakeTessellatorFn = std::unique_ptr<GrStrokeTessellator>(*)(ShaderFlags, const SkMatrix&,
- PathStrokeList*,
+using MakeTessellatorFn = std::unique_ptr<GrStrokeTessellator>(*)(ShaderFlags, const GrShaderCaps&,
+ const SkMatrix&, PathStrokeList*,
std::array<float, 2>, const
SkRect&);
static std::unique_ptr<GrStrokeTessellator> make_hw_tessellator(
- ShaderFlags shaderFlags, const SkMatrix& viewMatrix, PathStrokeList* pathStrokeList,
- std::array<float, 2> matrixMinMaxScales, const SkRect& strokeCullBounds) {
- return std::make_unique<GrStrokeHardwareTessellator>(shaderFlags, viewMatrix, pathStrokeList,
- matrixMinMaxScales, strokeCullBounds);
+ ShaderFlags shaderFlags, const GrShaderCaps& shaderCaps, const SkMatrix& viewMatrix,
+ PathStrokeList* pathStrokeList, std::array<float, 2> matrixMinMaxScales,
+ const SkRect& strokeCullBounds) {
+ return std::make_unique<GrStrokeHardwareTessellator>(shaderFlags, shaderCaps, viewMatrix,
+ pathStrokeList, matrixMinMaxScales,
+ strokeCullBounds);
}
static std::unique_ptr<GrStrokeTessellator> make_fixed_count_tessellator(
- ShaderFlags shaderFlags, const SkMatrix& viewMatrix, PathStrokeList* pathStrokeList,
- std::array<float, 2> matrixMinMaxScales, const SkRect& strokeCullBounds) {
+ ShaderFlags shaderFlags, const GrShaderCaps&, const SkMatrix& viewMatrix,
+ PathStrokeList* pathStrokeList, std::array<float, 2> matrixMinMaxScales,
+ const SkRect& strokeCullBounds) {
return std::make_unique<GrStrokeFixedCountTessellator>(shaderFlags, viewMatrix, pathStrokeList,
matrixMinMaxScales, strokeCullBounds);
}
@@ -327,7 +330,8 @@
fTotalVerbCount += fPathStrokes[i].fPath.countVerbs();
}
- fTessellator = fMakeTessellatorFn(fShaderFlags, SkMatrix::Scale(fMatrixScale, fMatrixScale),
+ fTessellator = fMakeTessellatorFn(fShaderFlags, *fTarget->caps().shaderCaps(),
+ SkMatrix::Scale(fMatrixScale, fMatrixScale),
fPathStrokes.data(), {fMatrixScale, fMatrixScale},
SkRectPriv::MakeLargest());
}
diff --git a/src/gpu/tessellate/GrStrokeFixedCountTessellator.cpp b/src/gpu/tessellate/GrStrokeFixedCountTessellator.cpp
index a17bdb4..e8865a3 100644
--- a/src/gpu/tessellate/GrStrokeFixedCountTessellator.cpp
+++ b/src/gpu/tessellate/GrStrokeFixedCountTessellator.cpp
@@ -16,6 +16,7 @@
namespace {
constexpr static float kMaxParametricSegments_pow4 = 48*48*48*48; // 48^4
+constexpr static int8_t kMaxParametricSegments_log2 = 6; // ceil(log2(48))
// Writes out strokes to the given instance chunk array, chopping if necessary so that all instances
// require 48 parametric segments or less. (We don't consider radial segments here. The tessellator
@@ -233,6 +234,16 @@
} // namespace
+GrStrokeFixedCountTessellator::GrStrokeFixedCountTessellator(ShaderFlags shaderFlags,
+ const SkMatrix& viewMatrix,
+ PathStrokeList* pathStrokeList,
+ std::array<float,2> matrixMinMaxScales,
+ const SkRect& strokeCullBounds)
+ : GrStrokeTessellator(GrStrokeShader::Mode::kFixedCount, shaderFlags,
+ kMaxParametricSegments_log2, viewMatrix, pathStrokeList,
+ matrixMinMaxScales, strokeCullBounds) {
+}
+
void GrStrokeFixedCountTessellator::prepare(GrMeshDrawOp::Target* target,
int totalCombinedVerbCnt) {
int maxEdgesInJoin = 0;
diff --git a/src/gpu/tessellate/GrStrokeFixedCountTessellator.h b/src/gpu/tessellate/GrStrokeFixedCountTessellator.h
index aeabbd3..ce708f8 100644
--- a/src/gpu/tessellate/GrStrokeFixedCountTessellator.h
+++ b/src/gpu/tessellate/GrStrokeFixedCountTessellator.h
@@ -15,14 +15,9 @@
// instance are emitted as degenerate triangles.
class GrStrokeFixedCountTessellator : public GrStrokeTessellator {
public:
- GrStrokeFixedCountTessellator(ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
- PathStrokeList* pathStrokeList,
- std::array<float, 2> matrixMinMaxScales,
- const SkRect& strokeCullBounds)
- : GrStrokeTessellator(GrStrokeShader::Mode::kFixedCount, shaderFlags,
- viewMatrix, pathStrokeList, matrixMinMaxScales,
- strokeCullBounds) {
- }
+ GrStrokeFixedCountTessellator(ShaderFlags, const SkMatrix&, PathStrokeList*,
+ std::array<float,2> matrixMinMaxScales,
+ const SkRect& strokeCullBounds);
void prepare(GrMeshDrawOp::Target*, int totalCombinedVerbCnt) override;
void draw(GrOpFlushState*) const override;
diff --git a/src/gpu/tessellate/GrStrokeHardwareTessellator.h b/src/gpu/tessellate/GrStrokeHardwareTessellator.h
index 2c61fef..3c4b19d 100644
--- a/src/gpu/tessellate/GrStrokeHardwareTessellator.h
+++ b/src/gpu/tessellate/GrStrokeHardwareTessellator.h
@@ -16,13 +16,13 @@
// MSAA if antialiasing is desired.
class GrStrokeHardwareTessellator : public GrStrokeTessellator {
public:
- GrStrokeHardwareTessellator(ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
- PathStrokeList* pathStrokeList,
- std::array<float, 2> matrixMinMaxScales,
- const SkRect& strokeCullBounds)
- : GrStrokeTessellator(GrStrokeShader::Mode::kHardwareTessellation,
- shaderFlags, viewMatrix, pathStrokeList, matrixMinMaxScales,
- strokeCullBounds) {
+ GrStrokeHardwareTessellator(ShaderFlags shaderFlags, const GrShaderCaps& shaderCaps,
+ const SkMatrix& viewMatrix, PathStrokeList* pathStrokeList,
+ std::array<float,2> matrixMinMaxScales, const SkRect&
+ strokeCullBounds)
+ : GrStrokeTessellator(GrStrokeShader::Mode::kHardwareTessellation, shaderFlags,
+ SkNextLog2(shaderCaps.maxTessellationSegments()), viewMatrix,
+ pathStrokeList, matrixMinMaxScales, strokeCullBounds) {
}
void prepare(GrMeshDrawOp::Target*, int totalCombinedVerbCnt) override;
diff --git a/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp b/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
index ec5e60a..d49861e 100644
--- a/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
+++ b/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
@@ -66,7 +66,7 @@
public:
constexpr static int8_t kMaxResolveLevel = GrStrokeIndirectTessellator::kMaxResolveLevel;
- ResolveLevelCounter(int* resolveLevelCounts, std::array<float, 2> matrixMinMaxScales)
+ ResolveLevelCounter(int* resolveLevelCounts, std::array<float,2> matrixMinMaxScales)
: fResolveLevelCounts(resolveLevelCounts), fMatrixMinMaxScales(matrixMinMaxScales) {
}
@@ -426,7 +426,7 @@
#endif
int* const fResolveLevelCounts;
- std::array<float, 2> fMatrixMinMaxScales;
+ std::array<float,2> fMatrixMinMaxScales;
GrStrokeTolerances fTolerances;
int fResolveLevelForCircles;
bool fIsRoundJoin;
@@ -434,15 +434,20 @@
} // namespace
+// GrTessellationPathRenderer crops paths that require more than 2^kMaxResolveLevel parametric
+// segments.
+constexpr static int8_t kMaxParametricSegments_log2 = GrTessellationPathRenderer::kMaxResolveLevel;
+
GrStrokeIndirectTessellator::GrStrokeIndirectTessellator(ShaderFlags shaderFlags,
const SkMatrix& viewMatrix,
PathStrokeList* pathStrokeList,
- std::array<float, 2> matrixMinMaxScales,
+ std::array<float,2> matrixMinMaxScales,
const SkRect& strokeCullBounds,
int totalCombinedVerbCnt,
SkArenaAlloc* alloc)
- : GrStrokeTessellator(GrStrokeShader::Mode::kLog2Indirect, shaderFlags, viewMatrix,
- pathStrokeList, matrixMinMaxScales, strokeCullBounds) {
+ : GrStrokeTessellator(GrStrokeShader::Mode::kLog2Indirect, shaderFlags,
+ kMaxParametricSegments_log2, viewMatrix, pathStrokeList,
+ matrixMinMaxScales, strokeCullBounds) {
// The maximum potential number of values we will need in fResolveLevels is:
//
// * 3 segments per verb (from two chops)
diff --git a/src/gpu/tessellate/GrStrokeIndirectTessellator.h b/src/gpu/tessellate/GrStrokeIndirectTessellator.h
index b3cdb27..ce481dc 100644
--- a/src/gpu/tessellate/GrStrokeIndirectTessellator.h
+++ b/src/gpu/tessellate/GrStrokeIndirectTessellator.h
@@ -23,7 +23,7 @@
constexpr static int8_t kMaxResolveLevel = 15;
GrStrokeIndirectTessellator(ShaderFlags, const SkMatrix& viewMatrix, PathStrokeList*,
- std::array<float, 2> matrixMinMaxScales,
+ std::array<float,2> matrixMinMaxScales,
const SkRect& strokeCullBounds, int totalCombinedVerbCnt,
SkArenaAlloc*);
diff --git a/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp b/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp
index 6a06059..10f74ff 100644
--- a/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp
+++ b/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp
@@ -18,9 +18,6 @@
SkPaint::Join joinType = shader.stroke().getJoin();
args.fVaryingHandler->emitAttributes(shader);
- // Constants.
- args.fVertBuilder->defineConstant("MAX_PARAMETRIC_SEGMENTS_LOG2",
- GrTessellationPathRenderer::kMaxResolveLevel);
args.fVertBuilder->defineConstant("float", "PI", "3.141592653589793238");
// Helper functions.
@@ -108,9 +105,7 @@
args.fVertBuilder->codeAppend(R"(
// Find how many parametric segments this stroke requires.
- float numParametricSegments = min(wangs_formula(PARAMETRIC_PRECISION,
- P[0], P[1], P[2], P[3], w),
- float(1 << MAX_PARAMETRIC_SEGMENTS_LOG2));
+ float numParametricSegments = wangs_formula(PARAMETRIC_PRECISION, P[0], P[1], P[2], P[3], w);
if (P[0] == P[1] && P[2] == P[3]) {
// This is how we describe lines, but Wang's formula does not return 1 in this case.
numParametricSegments = 1;
diff --git a/src/gpu/tessellate/GrStrokeShader.cpp b/src/gpu/tessellate/GrStrokeShader.cpp
index e0e6428..78d66f4 100644
--- a/src/gpu/tessellate/GrStrokeShader.cpp
+++ b/src/gpu/tessellate/GrStrokeShader.cpp
@@ -89,7 +89,7 @@
// float angle0;
// float strokeOutset;
//
- code->append(R"(
+ code->appendf(R"(
float2 tangent, strokeCoord;
if (combinedEdgeID != 0 && !isFinalEdge) {
// Compute the location and tangent direction of the stroke edge with the integral id
@@ -138,7 +138,7 @@
float2 tan0norm = normalize(tan0);
float negAbsRadsPerSegment = -abs(radsPerSegment);
float maxRotation0 = (1.0 + combinedEdgeID) * abs(radsPerSegment);
- for (int exp = MAX_PARAMETRIC_SEGMENTS_LOG2 - 1; exp >= 0; --exp) {
+ for (int exp = %i - 1; exp >= 0; --exp) {
// Test the parametric edge at lastParametricEdgeID + 2^exp.
float testParametricID = lastParametricEdgeID + float(1 << exp);
if (testParametricID <= maxParametricEdgeID) {
@@ -229,7 +229,7 @@
// ensures crack-free seaming between instances.
tangent = (combinedEdgeID == 0) ? tan0 : tan1;
strokeCoord = (combinedEdgeID == 0) ? P[0] : P[3];
- })");
+ })", shader.maxParametricSegments_log2() /* Parametric/radial sort loop count. */);
code->append(R"(
float2 ortho = normalize(float2(tangent.y, -tangent.x));
@@ -322,6 +322,7 @@
key = (key << 2) | (uint32_t)fMode;
key = (key << 2) | ((keyNeedsJoin) ? fStroke.getJoin() : 0);
key = (key << 1) | (uint32_t)fStroke.isHairlineStyle();
+ key = (key << 8) | fMaxParametricSegments_log2;
b->add32(key);
}
diff --git a/src/gpu/tessellate/GrStrokeShader.h b/src/gpu/tessellate/GrStrokeShader.h
index 24adb89..00b732f 100644
--- a/src/gpu/tessellate/GrStrokeShader.h
+++ b/src/gpu/tessellate/GrStrokeShader.h
@@ -26,13 +26,13 @@
class GrStrokeShader : public GrPathShader {
public:
// Are we using hardware tessellation or indirect draws?
- enum class Mode {
+ enum class Mode : int8_t {
kHardwareTessellation,
kLog2Indirect,
kFixedCount
};
- enum class ShaderFlags {
+ enum class ShaderFlags : uint8_t {
kNone = 0,
kWideColor = 1 << 0,
kDynamicStroke = 1 << 1, // Each patch or instance has its own stroke width and join type.
@@ -98,14 +98,15 @@
};
// 'viewMatrix' is applied to the geometry post tessellation. It cannot have perspective.
- GrStrokeShader(Mode mode, ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
- const SkStrokeRec& stroke, SkPMColor4f color)
+ GrStrokeShader(Mode mode, ShaderFlags shaderFlags, int8_t maxParametricSegments_log2,
+ const SkMatrix& viewMatrix, const SkStrokeRec& stroke, SkPMColor4f color)
: GrPathShader(kTessellate_GrStrokeShader_ClassID, viewMatrix,
(mode == Mode::kHardwareTessellation) ?
GrPrimitiveType::kPatches : GrPrimitiveType::kTriangleStrip,
(mode == Mode::kHardwareTessellation) ? 1 : 0)
, fMode(mode)
, fShaderFlags(shaderFlags)
+ , fMaxParametricSegments_log2(maxParametricSegments_log2)
, fStroke(stroke)
, fColor(color) {
if (fMode == Mode::kHardwareTessellation) {
@@ -167,6 +168,7 @@
Mode mode() const { return fMode; }
ShaderFlags flags() const { return fShaderFlags; }
+ int8_t maxParametricSegments_log2() const { return fMaxParametricSegments_log2; }
bool hasDynamicStroke() const { return fShaderFlags & ShaderFlags::kDynamicStroke; }
bool hasDynamicColor() const { return fShaderFlags & ShaderFlags::kDynamicColor; }
const SkStrokeRec& stroke() const { return fStroke;}
@@ -187,6 +189,7 @@
const Mode fMode;
const ShaderFlags fShaderFlags;
+ const int8_t fMaxParametricSegments_log2;
const SkStrokeRec fStroke;
const SkPMColor4f fColor;
diff --git a/src/gpu/tessellate/GrStrokeTessellateOp.cpp b/src/gpu/tessellate/GrStrokeTessellateOp.cpp
index 8e2f881..c97a374 100644
--- a/src/gpu/tessellate/GrStrokeTessellateOp.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellateOp.cpp
@@ -196,8 +196,8 @@
if (this->canUseHardwareTessellation(fTotalCombinedVerbCnt, caps)) {
// Only use hardware tessellation if we're drawing a somewhat large number of verbs.
// Otherwise we seem to be better off using instanced draws.
- fTessellator = arena->make<GrStrokeHardwareTessellator>(fShaderFlags, fViewMatrix,
- &fPathStrokeList,
+ fTessellator = arena->make<GrStrokeHardwareTessellator>(fShaderFlags, *caps.shaderCaps(),
+ fViewMatrix, &fPathStrokeList,
matrixMinMaxScales,
strokeCullBounds);
} else if (fTotalCombinedVerbCnt > 50 && !(fShaderFlags & ShaderFlags::kDynamicColor)) {
diff --git a/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp b/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp
index 9817aee..969932f 100644
--- a/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp
@@ -540,8 +540,6 @@
code.appendf("#define float3x2 mat3x2\n");
code.appendf("#define float4x2 mat4x2\n");
code.appendf("#define PI 3.141592653589793238\n");
- code.appendf("#define MAX_PARAMETRIC_SEGMENTS_LOG2 %i\n",
- SkNextLog2(shaderCaps.maxTessellationSegments()));
if (!shader.hasDynamicStroke()) {
const char* tessArgsName = uniformHandler.getUniformCStr(fTessControlArgsUniform);
diff --git a/src/gpu/tessellate/GrStrokeTessellator.h b/src/gpu/tessellate/GrStrokeTessellator.h
index f1e493c..4be1e36 100644
--- a/src/gpu/tessellate/GrStrokeTessellator.h
+++ b/src/gpu/tessellate/GrStrokeTessellator.h
@@ -26,10 +26,11 @@
};
GrStrokeTessellator(GrStrokeShader::Mode shaderMode, ShaderFlags shaderFlags,
- const SkMatrix& viewMatrix, PathStrokeList* pathStrokeList,
- std::array<float, 2> matrixMinMaxScales, const SkRect& strokeCullBounds)
- : fShader(shaderMode, shaderFlags, viewMatrix, pathStrokeList->fStroke,
- pathStrokeList->fColor)
+ int8_t maxParametricSegments_log2, const SkMatrix& viewMatrix,
+ PathStrokeList* pathStrokeList, std::array<float,2> matrixMinMaxScales,
+ const SkRect& strokeCullBounds)
+ : fShader(shaderMode, shaderFlags, maxParametricSegments_log2, viewMatrix,
+ pathStrokeList->fStroke, pathStrokeList->fColor)
, fPathStrokeList(pathStrokeList)
, fMatrixMinMaxScales(matrixMinMaxScales)
, fStrokeCullBounds(strokeCullBounds) {
@@ -49,7 +50,7 @@
protected:
GrStrokeShader fShader;
PathStrokeList* fPathStrokeList;
- const std::array<float, 2> fMatrixMinMaxScales;
+ const std::array<float,2> fMatrixMinMaxScales;
const SkRect fStrokeCullBounds; // See SkStrokeRec::inflationRadius.
};