Handle tessellated paths that require more segments than are supported
Adds a method to determine the worst-case number of tessellated line
segments that a path might require, and disables hardware tessellation
if it is more segments than are supported (falling back on indirect
draw shaders).
If the path requires even more segments than are supported by the
indirect draw shaders (1024), we crop the path to the viewport. The
required number of segments is proportional to the square root of the
bounding box's diagonal, so we won't start cropping paths until their
device-space bounding box diagonal is nearly 175,000 pixels long.
Change-Id: I8a9435e70bb93dda3464cc11a3e44fbe511744ae
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/293691
Reviewed-by: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/bench/TessellatePathBench.cpp b/bench/TessellatePathBench.cpp
index d22fe2b..b52c100 100644
--- a/bench/TessellatePathBench.cpp
+++ b/bench/TessellatePathBench.cpp
@@ -37,7 +37,7 @@
BenchmarkTarget() {
GrMockOptions mockOptions;
mockOptions.fDrawInstancedSupport = true;
- mockOptions.fTessellationSupport = true;
+ mockOptions.fMaxTessellationSegments = 64;
mockOptions.fMapBufferFlags = GrCaps::kCanMap_MapFlag;
mockOptions.fConfigOptions[(int)GrColorType::kAlpha_8].fRenderability =
GrMockOptions::ConfigOptions::Renderability::kMSAA;
@@ -110,7 +110,7 @@
class GrTessellatePathOp::TestingOnly_Benchmark : public Benchmark {
public:
TestingOnly_Benchmark(const char* subName, SkPath path, const SkMatrix& m)
- : fOp(m, path, GrPaint(), GrAAType::kMSAA) {
+ : fOp(m, path, GrPaint(), GrAAType::kMSAA, GrTessellationPathRenderer::OpFlags::kNone) {
fName.printf("tessellate_%s", subName);
}
diff --git a/include/gpu/mock/GrMockTypes.h b/include/gpu/mock/GrMockTypes.h
index b4c55c8..897a831 100644
--- a/include/gpu/mock/GrMockTypes.h
+++ b/include/gpu/mock/GrMockTypes.h
@@ -110,12 +110,12 @@
// GrCaps options.
bool fMipMapSupport = false;
bool fDrawInstancedSupport = false;
- bool fTessellationSupport = false;
bool fHalfFloatVertexAttributeSupport = false;
uint32_t fMapBufferFlags = 0;
int fMaxTextureSize = 2048;
int fMaxRenderTargetSize = 2048;
int fMaxVertexAttributes = 16;
+ int fMaxTessellationSegments = 0;
ConfigOptions fConfigOptions[kGrColorTypeCnt];
ConfigOptions fCompressedOptions[SkImage::kCompressionTypeCount];
diff --git a/samplecode/SampleTessellatedWedge.cpp b/samplecode/SampleTessellatedWedge.cpp
index 89731cb..0598b1f 100644
--- a/samplecode/SampleTessellatedWedge.cpp
+++ b/samplecode/SampleTessellatedWedge.cpp
@@ -51,7 +51,7 @@
SkMatrix fLastViewMatrix = SkMatrix::I();
SkPath fPath;
- GrTessellatePathOp::Flags fFlags = GrTessellatePathOp::Flags::kWireframe;
+ GrTessellationPathRenderer::OpFlags fOpFlags = GrTessellationPathRenderer::OpFlags::kWireframe;
class Click;
};
@@ -92,7 +92,7 @@
GrOpMemoryPool* pool = ctx->priv().opMemoryPool();
rtc->priv().testingOnly_addDrawOp(pool->allocate<GrTessellatePathOp>(
- canvas->getTotalMatrix(), fPath, std::move(paint), aa, fFlags));
+ canvas->getTotalMatrix(), fPath, std::move(paint), aa, fOpFlags));
// Draw the path points.
SkPaint pointsPaint;
@@ -149,8 +149,8 @@
bool TessellatedWedge::onChar(SkUnichar unichar) {
switch (unichar) {
case 'w':
- fFlags = (GrTessellatePathOp::Flags)(
- (int)fFlags ^ (int)GrTessellatePathOp::Flags::kWireframe);
+ fOpFlags = (GrTessellationPathRenderer::OpFlags)(
+ (int)fOpFlags ^ (int)GrTessellationPathRenderer::OpFlags::kWireframe);
return true;
case 'D': {
fPath.dump();
diff --git a/src/gpu/GrDynamicAtlas.h b/src/gpu/GrDynamicAtlas.h
index 68b2e0e..e96ffe8 100644
--- a/src/gpu/GrDynamicAtlas.h
+++ b/src/gpu/GrDynamicAtlas.h
@@ -54,6 +54,7 @@
void reset(SkISize initialSize, const GrCaps& desc);
+ int maxAtlasSize() const { return fMaxAtlasSize; }
GrTextureProxy* textureProxy() const { return fTextureProxy.get(); }
bool isInstantiated() const { return fTextureProxy->isInstantiated(); }
int currentWidth() const { return fWidth; }
diff --git a/src/gpu/GrShaderCaps.cpp b/src/gpu/GrShaderCaps.cpp
index 78bcab3..a629085 100644
--- a/src/gpu/GrShaderCaps.cpp
+++ b/src/gpu/GrShaderCaps.cpp
@@ -49,7 +49,6 @@
fPreferFlatInterpolation = false;
fNoPerspectiveInterpolationSupport = false;
fSampleMaskSupport = false;
- fTessellationSupport = false;
fExternalTextureSupport = false;
fVertexIDSupport = false;
fFPManipulationSupport = false;
@@ -77,6 +76,7 @@
fFBFetchColorName = nullptr;
fFBFetchExtensionString = nullptr;
fMaxFragmentSamplers = 0;
+ fMaxTessellationSegments = 0;
fAdvBlendEqInteraction = kNotSupported_AdvBlendEqInteraction;
}
@@ -133,7 +133,6 @@
writer->appendBool("Prefer flat interpolation", fPreferFlatInterpolation);
writer->appendBool("No perspective interpolation support", fNoPerspectiveInterpolationSupport);
writer->appendBool("Sample mask support", fSampleMaskSupport);
- writer->appendBool("Tessellation Support", fTessellationSupport);
writer->appendBool("External texture support", fExternalTextureSupport);
writer->appendBool("sk_VertexID support", fVertexIDSupport);
writer->appendBool("Floating point manipulation support", fFPManipulationSupport);
@@ -146,6 +145,7 @@
writer->appendBool("Can use do-while loops", fCanUseDoLoops);
writer->appendS32("Max FS Samplers", fMaxFragmentSamplers);
+ writer->appendS32("Max Tessellation Segments", fMaxTessellationSegments);
writer->appendString("Advanced blend equation interaction",
kAdvBlendEqInteractionStr[fAdvBlendEqInteraction]);
@@ -185,7 +185,7 @@
fGeometryShaderSupport = false;
}
if (options.fSuppressTessellationShaders) {
- fTessellationSupport = false;
+ fMaxTessellationSegments = 0;
}
#endif
}
diff --git a/src/gpu/GrShaderCaps.h b/src/gpu/GrShaderCaps.h
index 84e6372..3b3e1bf 100644
--- a/src/gpu/GrShaderCaps.h
+++ b/src/gpu/GrShaderCaps.h
@@ -72,8 +72,6 @@
bool sampleMaskSupport() const { return fSampleMaskSupport; }
- bool tessellationSupport() const { return fTessellationSupport; }
-
bool externalTextureSupport() const { return fExternalTextureSupport; }
bool vertexIDSupport() const { return fVertexIDSupport; }
@@ -248,6 +246,11 @@
int maxFragmentSamplers() const { return fMaxFragmentSamplers; }
+ // Maximum number of segments a tessellation edge can be divided into.
+ int maxTessellationSegments() const { return fMaxTessellationSegments; }
+
+ bool tessellationSupport() const { return SkToBool(fMaxTessellationSegments);}
+
bool textureSwizzleAppliedInShader() const { return fTextureSwizzleAppliedInShader; }
GrGLSLGeneration generation() const { return fGLSLGeneration; }
@@ -271,7 +274,6 @@
bool fPreferFlatInterpolation : 1;
bool fNoPerspectiveInterpolationSupport : 1;
bool fSampleMaskSupport : 1;
- bool fTessellationSupport : 1;
bool fExternalTextureSupport : 1;
bool fVertexIDSupport : 1;
bool fFPManipulationSupport : 1;
@@ -324,6 +326,7 @@
const char* fFBFetchExtensionString;
int fMaxFragmentSamplers;
+ int fMaxTessellationSegments;
AdvBlendEqInteraction fAdvBlendEqInteraction;
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index 2630173..94721a5 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -886,17 +886,21 @@
}
}
+ bool hasTessellationSupport = false;
if (GR_IS_GR_GL(standard)) {
- shaderCaps->fTessellationSupport =
- version >= GR_GL_VER(4,0) ||
- ctxInfo.hasExtension("GL_ARB_tessellation_shader");
- } else {
- if (version >= GR_GL_VER(3,2)) {
- shaderCaps->fTessellationSupport = true;
- } else if (ctxInfo.hasExtension("GL_OES_tessellation_shader")) {
- shaderCaps->fTessellationSupport = true;
- shaderCaps->fTessellationExtensionString = "GL_OES_tessellation_shader";
- }
+ hasTessellationSupport = version >= GR_GL_VER(4,0) ||
+ ctxInfo.hasExtension("GL_ARB_tessellation_shader");
+ } else if (version >= GR_GL_VER(3,2)) {
+ hasTessellationSupport = true;
+ } else if (ctxInfo.hasExtension("GL_OES_tessellation_shader")) {
+ hasTessellationSupport = true;
+ shaderCaps->fTessellationExtensionString = "GL_OES_tessellation_shader";
+ }
+ if (hasTessellationSupport) {
+ GR_GL_GetIntegerv(gli, GR_GL_MAX_TESS_GEN_LEVEL_OES,
+ &shaderCaps->fMaxTessellationSegments);
+ // Just in case a driver returns a negative number?
+ shaderCaps->fMaxTessellationSegments = std::max(0, shaderCaps->fMaxTessellationSegments);
}
shaderCaps->fVersionDeclString = get_glsl_version_decl_string(standard,
@@ -3850,14 +3854,14 @@
if (ctxInfo.version() >= GR_GL_VER(4,2)) {
fRequiresManualFBBarrierAfterTessellatedStencilDraw = true;
} else {
- shaderCaps->fTessellationSupport = false;
+ shaderCaps->fMaxTessellationSegments = 0;
}
} else {
// glMemoryBarrier wasn't around until es version 3.1.
if (ctxInfo.version() >= GR_GL_VER(3,1)) {
fRequiresManualFBBarrierAfterTessellatedStencilDraw = true;
} else {
- shaderCaps->fTessellationSupport = false;
+ shaderCaps->fMaxTessellationSegments = 0;
}
}
}
@@ -3865,7 +3869,7 @@
if (kQualcomm_GrGLDriver == ctxInfo.driver()) {
// Qualcomm fails to link programs with tessellation and does not give an error message.
// http://skbug.com/9740
- shaderCaps->fTessellationSupport = false;
+ shaderCaps->fMaxTessellationSegments = 0;
}
#ifdef SK_BUILD_FOR_WIN
diff --git a/src/gpu/gl/GrGLDefines.h b/src/gpu/gl/GrGLDefines.h
index a2ac315..8fde6be 100644
--- a/src/gpu/gl/GrGLDefines.h
+++ b/src/gpu/gl/GrGLDefines.h
@@ -1140,4 +1140,7 @@
/** GL_NV_fence_sync */
#define GR_GL_ALL_COMPLETED 0x84F2
+/* Tessellation */
+#define GR_GL_MAX_TESS_GEN_LEVEL_OES 0x8E7E
+
#endif
diff --git a/src/gpu/mock/GrMockCaps.h b/src/gpu/mock/GrMockCaps.h
index 42a9714..43821d1 100644
--- a/src/gpu/mock/GrMockCaps.h
+++ b/src/gpu/mock/GrMockCaps.h
@@ -35,7 +35,7 @@
fShaderCaps->fShaderDerivativeSupport = options.fShaderDerivativeSupport;
fShaderCaps->fDualSourceBlendingSupport = options.fDualSourceBlendingSupport;
fShaderCaps->fSampleMaskSupport = true;
- fShaderCaps->fTessellationSupport = options.fTessellationSupport;
+ fShaderCaps->fMaxTessellationSegments = options.fMaxTessellationSegments;
this->finishInitialization(contextOptions);
}
diff --git a/src/gpu/tessellate/GrResolveLevelCounter.h b/src/gpu/tessellate/GrResolveLevelCounter.h
index c74f46c..66b0540 100644
--- a/src/gpu/tessellate/GrResolveLevelCounter.h
+++ b/src/gpu/tessellate/GrResolveLevelCounter.h
@@ -22,7 +22,7 @@
SkDEBUGCODE(fHasCalledReset = true;)
}
- int reset(const SkPath& path, const SkMatrix& viewMatrix, float intolerance) {
+ void reset(const SkPath& path, const SkMatrix& viewMatrix, float intolerance) {
this->reset();
GrVectorXform xform(viewMatrix);
for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
@@ -38,7 +38,6 @@
break;
}
}
- return fTotalCubicInstanceCount;
}
void countCubic(int resolveLevel) {
diff --git a/src/gpu/tessellate/GrStencilPathShader.h b/src/gpu/tessellate/GrStencilPathShader.h
index c331726..b3024aa 100644
--- a/src/gpu/tessellate/GrStencilPathShader.h
+++ b/src/gpu/tessellate/GrStencilPathShader.h
@@ -9,6 +9,7 @@
#define GrStencilPathShader_DEFINED
#include "src/gpu/tessellate/GrPathShader.h"
+#include "src/gpu/tessellate/GrTessellationPathRenderer.h"
// This is the base class for shaders that stencil path elements, namely, triangles, standalone
// cubics, and wedges.
@@ -84,7 +85,7 @@
public:
// Each resolveLevel linearizes the curve into 2^resolveLevel line segments. The finest
// supported resolveLevel is therefore 2^12=4096 line segments.
- constexpr static int kMaxResolveLevel = 12;
+ constexpr static int kMaxResolveLevel = GrTessellationPathRenderer::kMaxResolveLevel;
// How many vertices do we need to draw in order to triangulate a cubic with 2^resolveLevel
// line segments?
diff --git a/src/gpu/tessellate/GrTessellatePathOp.cpp b/src/gpu/tessellate/GrTessellatePathOp.cpp
index 1889ad2..4d028b3 100644
--- a/src/gpu/tessellate/GrTessellatePathOp.cpp
+++ b/src/gpu/tessellate/GrTessellatePathOp.cpp
@@ -16,9 +16,14 @@
#include "src/gpu/tessellate/GrMidpointContourParser.h"
#include "src/gpu/tessellate/GrResolveLevelCounter.h"
#include "src/gpu/tessellate/GrStencilPathShader.h"
+#include "src/gpu/tessellate/GrTessellationPathRenderer.h"
-constexpr static int kMaxResolveLevel = GrMiddleOutCubicShader::kMaxResolveLevel;
-constexpr static float kTessellationIntolerance = 4; // 1/4 of a pixel.
+constexpr static float kLinearizationIntolerance =
+ GrTessellationPathRenderer::kLinearizationIntolerance;
+
+constexpr static int kMaxResolveLevel = GrTessellationPathRenderer::kMaxResolveLevel;
+
+using OpFlags = GrTessellationPathRenderer::OpFlags;
GrTessellatePathOp::FixedFunctionFlags GrTessellatePathOp::fixedFunctionFlags() const {
auto flags = FixedFunctionFlags::kUsesStencil;
@@ -61,11 +66,8 @@
// mode is to maximize GPU performance, and the middle-out topology used by our indirect
// draws is easier on the rasterizer than a tessellated fan. There also seems to be a
// small amount of fixed tessellation overhead that this avoids.
- //
- // NOTE: This will count fewer cubics than above if it discards any for resolveLevel=0.
GrResolveLevelCounter resolveLevelCounter;
- numCountedCubics = resolveLevelCounter.reset(fPath, fViewMatrix,
- kTessellationIntolerance);
+ resolveLevelCounter.reset(fPath, fViewMatrix, kLinearizationIntolerance);
this->prepareIndirectOuterCubics(flushState, resolveLevelCounter);
return;
}
@@ -75,8 +77,7 @@
// that contains both the inner triangles and the outer cubics, instead of using hardware
// tessellation. Also take this path if tessellation is not supported.
bool drawTrianglesAsIndirectCubicDraw = (numVerbs < 50);
- if (drawTrianglesAsIndirectCubicDraw ||
- !flushState->caps().shaderCaps()->tessellationSupport()) {
+ if (drawTrianglesAsIndirectCubicDraw || (fOpFlags & OpFlags::kDisableHWTessellation)) {
// Prepare outer cubics with indirect draws.
GrResolveLevelCounter resolveLevelCounter;
this->prepareMiddleOutTrianglesAndCubics(flushState, &resolveLevelCounter,
@@ -84,6 +85,9 @@
return;
}
+ // The caller should have sent Flags::kDisableHWTessellation if it was not supported.
+ SkASSERT(flushState->caps().shaderCaps()->tessellationSupport());
+
// Next see if we can split up the inner triangles and outer cubics into two draw calls. This
// allows for a more efficient inner triangle topology that can reduce the rasterizer load by a
// large margin on complex paths, but also causes greater CPU overhead due to the extra shader
@@ -116,14 +120,15 @@
// simple.
return false;
}
- if (((Flags::kStencilOnly | Flags::kWireframe) & fFlags) || GrAAType::kCoverage == fAAType ||
+ if (((OpFlags::kStencilOnly | OpFlags::kWireframe) & fOpFlags) ||
+ GrAAType::kCoverage == fAAType ||
(target->appliedClip() && target->appliedClip()->hasStencilClip())) {
// If we have certain flags, mixed samples, or a stencil clip then we unfortunately
// can't fill the inner polygon directly. Indicate that these triangles need to be
// stencilled.
fDoStencilTriangleBuffer = true;
}
- if (!(Flags::kStencilOnly & fFlags)) {
+ if (!(OpFlags::kStencilOnly & fOpFlags)) {
fDoFillTriangleBuffer = true;
}
return true;
@@ -184,7 +189,7 @@
if (resolveLevelCounter) {
// Quadratics get converted to cubics before rendering.
resolveLevelCounter->countCubic(GrWangsFormula::quadratic_log2(
- kTessellationIntolerance, pts, xform));
+ kLinearizationIntolerance, pts, xform));
break;
}
++numCountedCurves;
@@ -193,7 +198,7 @@
middleOut.pushVertex(pts[3]);
if (resolveLevelCounter) {
resolveLevelCounter->countCubic(GrWangsFormula::cubic_log2(
- kTessellationIntolerance, pts, xform));
+ kLinearizationIntolerance, pts, xform));
break;
}
++numCountedCurves;
@@ -351,7 +356,7 @@
default:
continue;
case SkPathVerb::kQuad:
- level = GrWangsFormula::quadratic_log2(kTessellationIntolerance, pts, xform);
+ level = GrWangsFormula::quadratic_log2(kLinearizationIntolerance, pts, xform);
if (level == 0) {
continue;
}
@@ -359,7 +364,7 @@
quad2cubic(pts, instanceLocations[level]);
break;
case SkPathVerb::kCubic:
- level = GrWangsFormula::cubic_log2(kTessellationIntolerance, pts, xform);
+ level = GrWangsFormula::cubic_log2(kLinearizationIntolerance, pts, xform);
if (level == 0) {
continue;
}
@@ -483,7 +488,7 @@
void GrTessellatePathOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
this->drawStencilPass(flushState);
- if (!(Flags::kStencilOnly & fFlags)) {
+ if (!(OpFlags::kStencilOnly & fOpFlags)) {
this->drawCoverPass(flushState);
}
}
@@ -513,7 +518,7 @@
if (GrAAType::kNone != fAAType) {
initArgs.fInputFlags |= GrPipeline::InputFlags::kHWAntialias;
}
- if (flushState->caps().wireframeSupport() && (Flags::kWireframe & fFlags)) {
+ if (flushState->caps().wireframeSupport() && (OpFlags::kWireframe & fOpFlags)) {
initArgs.fInputFlags |= GrPipeline::InputFlags::kWireframe;
}
SkASSERT(SkPathFillType::kWinding == fPath.getFillType() ||
diff --git a/src/gpu/tessellate/GrTessellatePathOp.h b/src/gpu/tessellate/GrTessellatePathOp.h
index cb216a8..c53e194 100644
--- a/src/gpu/tessellate/GrTessellatePathOp.h
+++ b/src/gpu/tessellate/GrTessellatePathOp.h
@@ -9,6 +9,7 @@
#define GrTessellatePathOp_DEFINED
#include "src/gpu/ops/GrMeshDrawOp.h"
+#include "src/gpu/tessellate/GrTessellationPathRenderer.h"
class GrAppliedHardClip;
class GrStencilPathShader;
@@ -18,20 +19,13 @@
// either GPU tessellation shaders or indirect draws. This Op doesn't apply analytic AA, so it
// requires a render target that supports either MSAA or mixed samples if AA is desired.
class GrTessellatePathOp : public GrDrawOp {
-public:
- enum class Flags {
- kNone = 0,
- kStencilOnly = (1 << 0),
- kWireframe = (1 << 1)
- };
-
private:
DEFINE_OP_CLASS_ID
GrTessellatePathOp(const SkMatrix& viewMatrix, const SkPath& path, GrPaint&& paint,
- GrAAType aaType, Flags flags = Flags::kNone)
+ GrAAType aaType, GrTessellationPathRenderer::OpFlags opFlags)
: GrDrawOp(ClassID())
- , fFlags(flags)
+ , fOpFlags(opFlags)
, fViewMatrix(viewMatrix)
, fPath(path)
, fAAType(aaType)
@@ -115,7 +109,7 @@
void drawStencilPass(GrOpFlushState*);
void drawCoverPass(GrOpFlushState*);
- const Flags fFlags;
+ const GrTessellationPathRenderer::OpFlags fOpFlags;
const SkMatrix fViewMatrix;
const SkPath fPath;
const GrAAType fAAType;
@@ -169,6 +163,4 @@
class TestingOnly_Benchmark;
};
-GR_MAKE_BITFIELD_CLASS_OPS(GrTessellatePathOp::Flags);
-
#endif
diff --git a/src/gpu/tessellate/GrTessellationPathRenderer.cpp b/src/gpu/tessellate/GrTessellationPathRenderer.cpp
index 34fcfc0..482a1ef 100644
--- a/src/gpu/tessellate/GrTessellationPathRenderer.cpp
+++ b/src/gpu/tessellate/GrTessellationPathRenderer.cpp
@@ -7,6 +7,7 @@
#include "src/gpu/tessellate/GrTessellationPathRenderer.h"
+#include "include/pathops/SkPathOps.h"
#include "src/core/SkIPoint16.h"
#include "src/core/SkPathPriv.h"
#include "src/gpu/GrClip.h"
@@ -18,6 +19,7 @@
#include "src/gpu/ops/GrFillRectOp.h"
#include "src/gpu/tessellate/GrDrawAtlasPathOp.h"
#include "src/gpu/tessellate/GrTessellatePathOp.h"
+#include "src/gpu/tessellate/GrWangsFormula.h"
constexpr static SkISize kAtlasInitialSize{512, 512};
constexpr static int kMaxAtlasSize = 2048;
@@ -30,9 +32,70 @@
// Ensure every path in the atlas falls in or below the 128px high rectanizer band.
constexpr static int kMaxAtlasPathHeight = 128;
-GrTessellationPathRenderer::GrTessellationPathRenderer(const GrCaps& caps) : fAtlas(
- GrColorType::kAlpha_8, GrDynamicAtlas::InternalMultisample::kYes, kAtlasInitialSize,
- std::min(kMaxAtlasSize, caps.maxPreferredRenderTargetSize()), caps, kAtlasAlgorithm) {
+GrTessellationPathRenderer::GrTessellationPathRenderer(const GrCaps& caps)
+ : fAtlas(GrColorType::kAlpha_8, GrDynamicAtlas::InternalMultisample::kYes,
+ kAtlasInitialSize, std::min(kMaxAtlasSize, caps.maxPreferredRenderTargetSize()),
+ caps, kAtlasAlgorithm) {
+ this->initAtlasFlags(*caps.shaderCaps());
+}
+
+void GrTessellationPathRenderer::initAtlasFlags(const GrShaderCaps& shaderCaps) {
+ fStencilAtlasFlags = OpFlags::kStencilOnly | OpFlags::kDisableHWTessellation;
+ fMaxAtlasPathWidth = fAtlas.maxAtlasSize() / 2;
+ // The atlas usually does better with hardware tessellation. If hardware tessellation is
+ // supported, we choose a max atlas path width that is guaranteed to never require more
+ // tessellation segments than are supported by the hardware.
+ if (!shaderCaps.tessellationSupport()) {
+ return;
+ }
+ // Since we limit the area of paths in the atlas to kMaxAtlasPathHeight^2, taller paths can't
+ // get very wide anyway. Find the tallest path whose width is limited by
+ // GrWangsFormula::worst_case_cubic() rather than the max area constraint, and use that for our
+ // max atlas path width.
+ //
+ // Solve the following equation for w:
+ //
+ // GrWangsFormula::worst_case_cubic(kLinearizationIntolerance, w, kMaxAtlasPathHeight^2 / w)
+ // == maxTessellationSegments
+ //
+ float k = GrWangsFormula::cubic_k(kLinearizationIntolerance);
+ float h = kMaxAtlasPathHeight;
+ float s = shaderCaps.maxTessellationSegments();
+ // Quadratic formula from Numerical Recipes in C:
+ //
+ // q = -1/2 [b + sign(b) sqrt(b*b - 4*a*c)]
+ // x1 = q/a
+ // x2 = c/q
+ //
+ // float a = 1; // 'a' is always 1 in our specific equation.
+ float b = -s*s*s*s / (4*k*k); // Always negative.
+ float c = h*h*h*h; // Always positive.
+ float det = b*b - 4*1*c;
+ if (det <= 0) {
+ // maxTessellationSegments is too small for any path whose area == kMaxAtlasPathHeight^2.
+ // (This is unexpected because the GL spec mandates a minimum of 64 segments.)
+ SkDebugf("WARNING: maxTessellationSegments seems too low. (%i)\n",
+ shaderCaps.maxTessellationSegments());
+ return;
+ }
+ float q = -.5f * (b - std::sqrt(det)); // Always positive.
+ // The two roots represent the width^2 and height^2 of the tallest rectangle that is limited by
+ // GrWangsFormula::worst_case_cubic().
+ float r0 = q; // Always positive.
+ float r1 = c/q; // Always positive.
+ float worstCaseWidth = std::sqrt(std::max(r0, r1));
+#ifdef SK_DEBUG
+ float worstCaseHeight = std::sqrt(std::min(r0, r1));
+ // Verify the above equation worked as expected. It should have found a width and height whose
+ // area == kMaxAtlasPathHeight^2.
+ SkASSERT(SkScalarNearlyEqual(worstCaseHeight * worstCaseWidth, h*h, 1));
+ // Verify GrWangsFormula::worst_case_cubic() still works as we expect. The worst case number of
+ // segments for this bounding box should be maxTessellationSegments.
+ SkASSERT(SkScalarNearlyEqual(GrWangsFormula::worst_case_cubic(
+ kLinearizationIntolerance, worstCaseWidth, worstCaseHeight), s, 1));
+#endif
+ fStencilAtlasFlags &= ~OpFlags::kDisableHWTessellation;
+ fMaxAtlasPathWidth = std::min(fMaxAtlasPathWidth, (int)worstCaseWidth);
}
GrPathRenderer::CanDrawPath GrTessellationPathRenderer::onCanDrawPath(
@@ -58,9 +121,14 @@
bool GrTessellationPathRenderer::onDrawPath(const DrawPathArgs& args) {
GrRenderTargetContext* renderTargetContext = args.fRenderTargetContext;
GrOpMemoryPool* pool = args.fContext->priv().opMemoryPool();
+ const GrShaderCaps& shaderCaps = *args.fContext->priv().caps()->shaderCaps();
+
SkPath path;
args.fShape->asPath(&path);
+ SkRect devBounds;
+ args.fViewMatrix->mapRect(&devBounds, path.getBounds());
+
// See if the path is small and simple enough to atlas instead of drawing directly.
//
// NOTE: The atlas uses alpha8 coverage even for msaa render targets. We could theoretically
@@ -69,8 +137,19 @@
SkIRect devIBounds;
SkIPoint16 locationInAtlas;
bool transposedInAtlas;
- if (this->tryAddPathToAtlas(*args.fContext->priv().caps(), *args.fViewMatrix, path,
+ if (this->tryAddPathToAtlas(*args.fContext->priv().caps(), *args.fViewMatrix, path, devBounds,
args.fAAType, &devIBounds, &locationInAtlas, &transposedInAtlas)) {
+#ifdef SK_DEBUG
+ // If using hardware tessellation in the atlas, make sure the max number of segments is
+ // sufficient for this path. fMaxAtlasPathWidth should have been tuned for this to always be
+ // the case.
+ if (!(fStencilAtlasFlags & OpFlags::kDisableHWTessellation)) {
+ int worstCaseNumSegments = GrWangsFormula::worst_case_cubic(kLinearizationIntolerance,
+ devIBounds.width(),
+ devIBounds.height());
+ SkASSERT(worstCaseNumSegments <= shaderCaps.maxTessellationSegments());
+ }
+#endif
auto op = pool->allocate<GrDrawAtlasPathOp>(
renderTargetContext->numSamples(), sk_ref_sp(fAtlas.textureProxy()),
devIBounds, locationInAtlas, transposedInAtlas, *args.fViewMatrix,
@@ -79,15 +158,58 @@
return true;
}
- auto op = pool->allocate<GrTessellatePathOp>(
- *args.fViewMatrix, path, std::move(args.fPaint), args.fAAType);
+ auto drawPathFlags = OpFlags::kNone;
+
+ // Find the worst-case log2 number of line segments that a curve in this path might need to be
+ // divided into.
+ int worstCaseResolveLevel = GrWangsFormula::worst_case_cubic_log2(kLinearizationIntolerance,
+ devBounds.width(),
+ devBounds.height());
+ if (worstCaseResolveLevel > kMaxResolveLevel) {
+ // The path is too large for our internal indirect draw shaders. Crop it to the viewport.
+ SkPath viewport;
+ viewport.addRect(SkRect::MakeIWH(renderTargetContext->width(),
+ renderTargetContext->height()).makeOutset(1, 1));
+ // Perform the crop in device space so it's a simple rect-path intersection.
+ path.transform(*args.fViewMatrix);
+ if (!Op(viewport, path, kIntersect_SkPathOp, &path)) {
+ // The crop can fail if the PathOps encounter NaN or infinities. Return true
+ // because drawing nothing is acceptable behavior for FP overflow.
+ return true;
+ }
+ // Transform the path back to its own local space.
+ SkMatrix inverse;
+ if (!args.fViewMatrix->invert(&inverse)) {
+ return true; // Singular view matrix. Nothing would have drawn anyway. Return true.
+ }
+ path.transform(inverse);
+ path.setIsVolatile(true);
+ args.fViewMatrix->mapRect(&devBounds, path.getBounds());
+ worstCaseResolveLevel = GrWangsFormula::worst_case_cubic_log2(kLinearizationIntolerance,
+ devBounds.width(),
+ devBounds.height());
+ // kMaxResolveLevel should be large enough to tessellate paths the size of any screen we
+ // might encounter.
+ SkASSERT(worstCaseResolveLevel <= kMaxResolveLevel);
+ }
+
+ if ((1 << worstCaseResolveLevel) > shaderCaps.maxTessellationSegments()) {
+ // The path is too large for hardware tessellation; a curve in this bounding box could
+ // potentially require more segments than are supported by the hardware. Fall back on
+ // indirect draws.
+ drawPathFlags |= OpFlags::kDisableHWTessellation;
+ }
+
+ auto op = pool->allocate<GrTessellatePathOp>(*args.fViewMatrix, path, std::move(args.fPaint),
+ args.fAAType, drawPathFlags);
renderTargetContext->addDrawOp(args.fClip, std::move(op));
return true;
}
bool GrTessellationPathRenderer::tryAddPathToAtlas(
- const GrCaps& caps, const SkMatrix& viewMatrix, const SkPath& path, GrAAType aaType,
- SkIRect* devIBounds, SkIPoint16* locationInAtlas, bool* transposedInAtlas) {
+ const GrCaps& caps, const SkMatrix& viewMatrix, const SkPath& path, const SkRect& devBounds,
+ GrAAType aaType, SkIRect* devIBounds, SkIPoint16* locationInAtlas,
+ bool* transposedInAtlas) {
if (!caps.multisampleDisableSupport() && GrAAType::kNone == aaType) {
return false;
}
@@ -98,13 +220,10 @@
return false;
}
- SkRect devBounds;
- viewMatrix.mapRect(&devBounds, path.getBounds());
- devBounds.roundOut(devIBounds);
-
// Transpose tall paths in the atlas. Since we limit ourselves to small-area paths, this
// guarantees that every atlas entry has a small height, which lends very well to efficient pow2
// atlas packing.
+ devBounds.roundOut(devIBounds);
int maxDimenstion = devIBounds->width();
int minDimension = devIBounds->height();
*transposedInAtlas = minDimension > maxDimenstion;
@@ -115,7 +234,7 @@
// Check if the path is too large for an atlas. Since we use "minDimension" for height in the
// atlas, limiting to kMaxAtlasPathHeight^2 pixels guarantees height <= kMaxAtlasPathHeight.
if (maxDimenstion * minDimension > kMaxAtlasPathHeight * kMaxAtlasPathHeight ||
- maxDimenstion > kMaxAtlasSize / 2) {
+ maxDimenstion > fMaxAtlasPathWidth) {
return false;
}
@@ -149,7 +268,7 @@
GrAAType aaType = (GrAA::kYes == args.fDoStencilMSAA) ? GrAAType::kMSAA : GrAAType::kNone;
auto op = args.fContext->priv().opMemoryPool()->allocate<GrTessellatePathOp>(
- *args.fViewMatrix, path, GrPaint(), aaType, GrTessellatePathOp::Flags::kStencilOnly);
+ *args.fViewMatrix, path, GrPaint(), aaType, OpFlags::kStencilOnly);
args.fRenderTargetContext->addDrawOp(args.fClip, std::move(op));
}
@@ -198,8 +317,7 @@
uberPath->setFillType(fillType);
GrAAType aaType = (antialias) ? GrAAType::kMSAA : GrAAType::kNone;
auto op = onFlushRP->opMemoryPool()->allocate<GrTessellatePathOp>(
- SkMatrix::I(), *uberPath, GrPaint(), aaType,
- GrTessellatePathOp::Flags::kStencilOnly);
+ SkMatrix::I(), *uberPath, GrPaint(), aaType, fStencilAtlasFlags);
rtc->addDrawOp(nullptr, std::move(op));
}
}
diff --git a/src/gpu/tessellate/GrTessellationPathRenderer.h b/src/gpu/tessellate/GrTessellationPathRenderer.h
index 28f2f1a..4035a0d 100644
--- a/src/gpu/tessellate/GrTessellationPathRenderer.h
+++ b/src/gpu/tessellate/GrTessellationPathRenderer.h
@@ -19,9 +19,32 @@
// target that supports either MSAA or mixed samples if AA is desired.
class GrTessellationPathRenderer : public GrPathRenderer, public GrOnFlushCallbackObject {
public:
- const char* name() const final { return "Tess"; }
+ // Don't allow linearized segments to be off by more than 1/4th of a pixel from the true curve.
+ constexpr static float kLinearizationIntolerance = 4;
+
+ // This is the maximum resolve level supported by our internal indirect draw shaders. (Indirect
+ // draws are an alternative to hardware tessellation, and we can use them when hardware support
+ // is lacking.)
+ //
+ // At a given resolveLevel, a curve gets linearized into 2^resolveLevel line segments. So the
+ // finest resolveLevel supported by our indirect draw shaders is 2^10 == 1024 line segments.
+ //
+ // 1024 line segments is enough resolution (with intolerance == 4) to guarantee we can render a
+ // 123575px x 123575px path. (See GrWangsFormula::worst_case_cubic.)
+ constexpr static int kMaxResolveLevel = 10;
+
+ // We send these flags to the internal tessellation Ops to control how a path gets rendered.
+ enum class OpFlags {
+ kNone = 0,
+ // Used when tessellation is not supported, or when a path will require more resolution than
+ // the max number of segments supported by the hardware.
+ kDisableHWTessellation = (1 << 0),
+ kStencilOnly = (1 << 1),
+ kWireframe = (1 << 2)
+ };
GrTessellationPathRenderer(const GrCaps&);
+ const char* name() const final { return "GrTessellationPathRenderer"; }
StencilSupport onGetStencilSupport(const GrStyledShape& shape) const override {
// TODO: Single-pass (e.g., convex) paths can have full support.
return kStencilOnly_StencilSupport;
@@ -33,19 +56,24 @@
int numOpsTaskIDs) override;
private:
+ void initAtlasFlags(const GrShaderCaps& shaderCaps);
SkPath* getAtlasUberPath(SkPathFillType fillType, bool antialias) {
int idx = (int)antialias << 1;
idx |= (int)fillType & 1;
return &fAtlasUberPaths[idx];
}
// Allocates space in fAtlas if the path is small and simple enough, and if there is room.
- bool tryAddPathToAtlas(const GrCaps&, const SkMatrix&, const SkPath&, GrAAType,
- SkIRect* devIBounds, SkIPoint16* locationInAtlas,
+ bool tryAddPathToAtlas(const GrCaps&, const SkMatrix&, const SkPath&, const SkRect& devBounds,
+ GrAAType, SkIRect* devIBounds, SkIPoint16* locationInAtlas,
bool* transposedInAtlas);
void renderAtlas(GrOnFlushResourceProvider*);
GrDynamicAtlas fAtlas;
+ OpFlags fStencilAtlasFlags;
+ int fMaxAtlasPathWidth;
SkPath fAtlasUberPaths[4]; // 2 fillTypes * 2 antialias modes.
};
+GR_MAKE_BITFIELD_CLASS_OPS(GrTessellationPathRenderer::OpFlags);
+
#endif
diff --git a/src/gpu/tessellate/GrWangsFormula.h b/src/gpu/tessellate/GrWangsFormula.h
index 1fa79c6..b9c3264 100644
--- a/src/gpu/tessellate/GrWangsFormula.h
+++ b/src/gpu/tessellate/GrWangsFormula.h
@@ -22,6 +22,11 @@
return std::sqrt(nn[0] + nn[1]);
}
+// Constant term for the quatratic formula.
+constexpr float quadratic_k(float intolerance) {
+ return .25f * intolerance;
+}
+
// Returns the minimum number of evenly spaced (in the parametric sense) line segments that the
// quadratic must be chopped into in order to guarantee all lines stay within a distance of
// "1/intolerance" pixels from the true curve.
@@ -29,10 +34,15 @@
Sk2f p0 = Sk2f::Load(pts);
Sk2f p1 = Sk2f::Load(pts + 1);
Sk2f p2 = Sk2f::Load(pts + 2);
- float k = intolerance * .25f;
+ float k = quadratic_k(intolerance);
return SkScalarSqrt(k * length(p0 - p1*2 + p2));
}
+// Constant term for the cubic formula.
+constexpr float cubic_k(float intolerance) {
+ return .75f * intolerance;
+}
+
// Returns the minimum number of evenly spaced (in the parametric sense) line segments that the
// cubic must be chopped into in order to guarantee all lines stay within a distance of
// "1/intolerance" pixels from the true curve.
@@ -41,11 +51,19 @@
Sk2f p1 = Sk2f::Load(pts + 1);
Sk2f p2 = Sk2f::Load(pts + 2);
Sk2f p3 = Sk2f::Load(pts + 3);
- float k = intolerance * .75f;
+ float k = cubic_k(intolerance);
return SkScalarSqrt(k * length(Sk2f::Max((p0 - p1*2 + p2).abs(),
(p1 - p2*2 + p3).abs())));
}
+// Returns the maximum number of line segments a cubic with the given device-space bounding box size
+// would ever need to be divided into. This is simply a special case of the cubic formula where we
+// maximize its value by placing control points on specific corners of the bounding box.
+SK_ALWAYS_INLINE static float worst_case_cubic(float intolerance, float devWidth, float devHeight) {
+ float k = cubic_k(intolerance);
+ return SkScalarSqrt(2*k * SkVector::Length(devWidth, devHeight));
+}
+
// Returns the log2 of the provided value, were that value to be rounded up to the next power of 2.
// Returns 0 if value <= 0:
// Never returns a negative number, even if value is NaN.
@@ -63,6 +81,10 @@
return exp & ~(exp >> 31); // Return 0 for negative or denormalized floats, and exponents < 0.
}
+SK_ALWAYS_INLINE static int ceil_log2_sqrt_sqrt(float f) {
+ return (nextlog2(f) + 3) >> 2; // i.e., "ceil(log2(sqrt(sqrt(f))))
+}
+
// Returns the minimum log2 number of evenly spaced (in the parametric sense) line segments that the
// transformed quadratic must be chopped into in order to guarantee all lines stay within a distance
// of "1/intolerance" pixels from the true curve.
@@ -74,9 +96,9 @@
Sk2f v = p0 + p1*-2 + p2;
v = vectorXform(v);
Sk2f vv = v*v;
- float k = intolerance * .25f;
+ float k = quadratic_k(intolerance);
float f = k*k * (vv[0] + vv[1]);
- return (nextlog2(f) + 3) >> 2; // ceil(log2(sqrt(sqrt(f))))
+ return ceil_log2_sqrt_sqrt(f);
}
// Returns the minimum log2 number of evenly spaced (in the parametric sense) line segments that the
@@ -91,9 +113,17 @@
v = vectorXform(v);
Sk4f vv = v*v;
vv = Sk4f::Max(vv, SkNx_shuffle<2,3,0,1>(vv));
- float k = intolerance * .75f;
+ float k = cubic_k(intolerance);
float f = k*k * (vv[0] + vv[1]);
- return (nextlog2(f) + 3) >> 2; // ceil(log2(sqrt(sqrt(f))))
+ return ceil_log2_sqrt_sqrt(f);
+}
+
+// Returns the maximum log2 number of line segments a cubic with the given device-space bounding box
+// size would ever need to be divided into.
+SK_ALWAYS_INLINE static int worst_case_cubic_log2(float intolerance, float devWidth,
+ float devHeight) {
+ float k = cubic_k(intolerance);
+ return ceil_log2_sqrt_sqrt(4*k*k * (devWidth * devWidth + devHeight * devHeight));
}
} // namespace
diff --git a/tests/WangsFormulaTest.cpp b/tests/WangsFormulaTest.cpp
index a8e1c77..7d5161f 100644
--- a/tests/WangsFormulaTest.cpp
+++ b/tests/WangsFormulaTest.cpp
@@ -274,5 +274,38 @@
check_quadratic_log2_with_transform(pts, m);
});
});
+}
+DEF_TEST(WangsFormula_worst_case_cubic, r) {
+ {
+ SkPoint worstP[] = {{0,0}, {100,100}, {0,0}, {0,0}};
+ REPORTER_ASSERT(r, GrWangsFormula::worst_case_cubic(kIntolerance, 100, 100) ==
+ GrWangsFormula::cubic(kIntolerance, worstP));
+ REPORTER_ASSERT(r, GrWangsFormula::worst_case_cubic_log2(kIntolerance, 100, 100) ==
+ GrWangsFormula::cubic_log2(kIntolerance, worstP));
+ }
+ {
+ SkPoint worstP[] = {{100,100}, {100,100}, {200,200}, {100,100}};
+ REPORTER_ASSERT(r, GrWangsFormula::worst_case_cubic(kIntolerance, 100, 100) ==
+ GrWangsFormula::cubic(kIntolerance, worstP));
+ REPORTER_ASSERT(r, GrWangsFormula::worst_case_cubic_log2(kIntolerance, 100, 100) ==
+ GrWangsFormula::cubic_log2(kIntolerance, worstP));
+ }
+ auto check_worst_case_cubic = [&](const SkPoint* pts) {
+ SkRect bbox;
+ bbox.setBoundsNoCheck(pts, 4);
+ float worst = GrWangsFormula::worst_case_cubic(kIntolerance, bbox.width(), bbox.height());
+ int worst_log2 = GrWangsFormula::worst_case_cubic_log2(kIntolerance, bbox.width(),
+ bbox.height());
+ float actual = GrWangsFormula::cubic(kIntolerance, pts);
+ REPORTER_ASSERT(r, worst >= actual);
+ REPORTER_ASSERT(r, std::ceil(std::log2(std::max(1.f, worst))) == worst_log2);
+ SkASSERT(std::ceil(std::log2(std::max(1.f, worst))) == worst_log2);
+ };
+ SkRandom rand;
+ for (int i = 0; i < 100; ++i) {
+ for_random_beziers(4, &rand, [&](const SkPoint pts[]) {
+ check_worst_case_cubic(pts);
+ });
+ }
}