Update path tessellation shaders to use GrWangsFormula
Bug: skia:10419
Change-Id: I34114a7b034f51bc485ae2ddb8ae85addad91f93
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/410496
Reviewed-by: Tyler Denniston <tdenniston@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/gpu/geometry/GrWangsFormula.h b/src/gpu/geometry/GrWangsFormula.h
index e31f9b3..7800a35 100644
--- a/src/gpu/geometry/GrWangsFormula.h
+++ b/src/gpu/geometry/GrWangsFormula.h
@@ -181,23 +181,24 @@
}
// Emits an SKSL function that calculates Wang's formula for the given set of 4 points. The points
-// represent a cubic, or, if 'hasConics' is true and W >= 0, a conic defined by the first 3 points.
+// represent a cubic, or, if 'hasConics' is true and w >= 0, a conic defined by the first 3 points.
inline static SkString as_sksl(bool hasConics) {
SkString code;
code.appendf(R"(
float length_pow2(float2 v) {
return dot(v, v);
}
- float wangs_formula(float parametricPrecision, float4x2 P, float W) {
+ float wangs_formula(float parametricPrecision, float2 p0, float2 p1, float2 p2, float2 p3,
+ float w) {
const float CUBIC_TERM_POW2 = %f;
- float l0 = length_pow2(fma(float2(-2), P[1], P[2]) + P[0]);
- float l1 = length_pow2(fma(float2(-2), P[2], P[3]) + P[1]);
+ float l0 = length_pow2(fma(float2(-2), p1, p2) + p0);
+ float l1 = length_pow2(fma(float2(-2), p2, p3) + p1);
float m = CUBIC_TERM_POW2 * max(l0, l1);)", length_term_pow2<3>(1));
if (hasConics) {
// FIXME: Use the better formula from GrWangsFormula::conic().
code.appendf(R"(
const float QUAD_TERM_POW2 = %f;
- m = (W >= 0.0) ? QUAD_TERM_POW2 * l0 : m;)", length_term_pow2<2>(1));
+ m = (w >= 0.0) ? QUAD_TERM_POW2 * l0 : m;)", length_term_pow2<2>(1));
}
code.append(R"(
return max(ceil(sqrt(parametricPrecision * sqrt(m))), 1.0);
diff --git a/src/gpu/tessellate/GrStencilPathShader.cpp b/src/gpu/tessellate/GrStencilPathShader.cpp
index 0ff4619..ec72587 100644
--- a/src/gpu/tessellate/GrStencilPathShader.cpp
+++ b/src/gpu/tessellate/GrStencilPathShader.cpp
@@ -7,29 +7,16 @@
#include "src/gpu/tessellate/GrStencilPathShader.h"
+#include "src/gpu/geometry/GrWangsFormula.h"
#include "src/gpu/glsl/GrGLSLGeometryProcessor.h"
#include "src/gpu/glsl/GrGLSLVarying.h"
#include "src/gpu/glsl/GrGLSLVertexGeoBuilder.h"
-// Wang's formula for cubics (1985) gives us the number of evenly spaced (in the
-// parametric sense) line segments that are guaranteed to be within a distance of
-// "MAX_LINEARIZATION_ERROR" from the actual curve.
-constexpr static char kWangsFormulaCubicFn[] = R"(
-#define MAX_LINEARIZATION_ERROR 0.25 // 1/4 pixel
-float length_pow2(vec2 v) {
- return dot(v, v);
-}
-float wangs_formula_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3) {
- float k = (3.0 * 2.0) / (8.0 * MAX_LINEARIZATION_ERROR);
- float m = max(length_pow2(-2.0*p1 + p2 + p0),
- length_pow2(-2.0*p2 + p3 + p1));
- return max(1.0, ceil(sqrt(k * sqrt(m))));
-})";
-
constexpr static char kSkSLTypeDefs[] = R"(
#define float4x3 mat4x3
-#define float3 vec3
#define float2 vec2
+#define float3 vec3
+#define float4 vec4
)";
// Converts a 4-point input patch into the rational cubic it intended to represent.
@@ -86,8 +73,8 @@
if (!shader.willUseTessellationShaders()) { // This is the case for the triangle shader.
gpArgs->fPositionVar = vertexPos;
} else {
- v->declareGlobal(GrShaderVar("vsPt", kFloat2_GrSLType, GrShaderVar::TypeModifier::Out));
- v->codeAppendf("vsPt = %s;", vertexPos.c_str());
+ v->declareGlobal(GrShaderVar("P", kFloat2_GrSLType, GrShaderVar::TypeModifier::Out));
+ v->codeAppendf("P = %s;", vertexPos.c_str());
}
// The fragment shader is normally disabled, but output fully opaque white.
@@ -118,53 +105,68 @@
const GrGLSLUniformHandler&,
const GrShaderCaps&) const override {
SkString code(versionAndExtensionDecls);
- code.append(kWangsFormulaCubicFn);
+ code.appendf(R"(
+ #define PRECISION %f)", GrTessellationPathRenderer::kLinearizationPrecision);
code.append(kSkSLTypeDefs);
+ code.append(GrWangsFormula::as_sksl(true/*hasConics*/));
code.append(kUnpackRationalCubicFn);
code.append(R"(
layout(vertices = 1) out;
- in vec2 vsPt[];
- out vec4 X[];
- out vec4 Y[];
- out float w[];
+ in vec2 P[];
+ patch out mat4x2 rationalCubicXY;
+ patch out float rationalCubicW;
void main() {
- mat4x3 P = unpack_rational_cubic(vsPt[0], vsPt[1], vsPt[2], vsPt[3]);
+ vec2 p0=P[0], p1w=P[1], p2=P[2], p3=P[3];
+ float w = -1;
+ if (isinf(p3.y)) {
+ // This patch is actually a conic. Project to homogeneous space.
+ w = p3.x;
+ p1w *= w;
+ }
- // Chop the curve at T=1/2. Here we take advantage of the fact that a uniform scalar
- // has no effect on homogeneous coordinates in order to eval quickly at .5:
- //
- // mix(p0, p1, .5) / mix(w0, w1, .5)
- // == ((p0 + p1) * .5) / ((w0 + w1) * .5)
- // == (p0 + p1) / (w0 + w1)
- //
- vec3 ab = P[0] + P[1];
- vec3 bc = P[1] + P[2];
- vec3 cd = P[2] + P[3];
- vec3 abc = ab + bc;
- vec3 bcd = bc + cd;
- vec3 abcd = abc + bcd;
+ // Chop the curve at T=1/2.
+ vec2 ab = (p0 + p1w) * .5;
+ vec2 bc = (p1w + p2) * .5;
+ vec2 cd = (p2 + p3) * .5;
+ vec2 abc = (ab + bc) * .5;
+ vec2 bcd = (bc + cd) * .5;
+ vec2 abcd = (abc + bcd) * .5;
- // Calculate how many triangles we need to linearize each half of the curve. We
- // simply call Wang's formula for integral cubics with the down-projected points.
- // This appears to be an upper bound on what the actual number of subdivisions would
- // have been.
- float w0 = wangs_formula_cubic(P[0].xy, ab.xy/ab.z, abc.xy/abc.z, abcd.xy/abcd.z);
- float w1 = wangs_formula_cubic(abcd.xy/abcd.z, bcd.xy/bcd.z, cd.xy/cd.z, P[3].xy);
+ float n0, n1;
+ if (w < 0) {
+ // The patch is a cubic. Calculate how many segments are needed to linearize
+ // each half of the curve.
+ n0 = wangs_formula(PRECISION, p0, ab, abc, abcd, -1); // w<0 means cubic.
+ n1 = wangs_formula(PRECISION, abcd, bcd, cd, p3, -1);
+ rationalCubicXY = mat4x2(p0, p1w, p2, p3); // p1w == p1 because w == 1.
+ rationalCubicW = 1;
+ } else {
+ // The patch is a conic. Unproject p0..5. w1 == w2 == w3 when chopping at .5.
+ // (See SkConic::chopAt().)
+ float r = 2.0 / (1.0 + w);
+ ab *= r, bc *= r, abc *= r;
+ // Put in "standard form" where w0 == w2 == w4 == 1.
+ float w_ = inversesqrt(r); // Both halves have the same w' when chopping at .5.
+ // Calculate how many segments are needed to linearize each half of the curve.
+ n0 = wangs_formula(PRECISION, p0, ab, abc, float2(0), w_);
+ n1 = wangs_formula(PRECISION, abc, bc, p2, float2(0), w_);
+ // Covert the conic to a rational cubic in projected form.
+ rationalCubicXY = mat4x2(p0,
+ mix(float4(p0,p2), p1w.xyxy, 2.0/3.0),
+ p2);
+ rationalCubicW = fma(w, 2.0/3.0, 1.0/3.0);
+ }
- gl_TessLevelOuter[0] = w1;
+ gl_TessLevelOuter[0] = n1;
gl_TessLevelOuter[1] = 1.0;
- gl_TessLevelOuter[2] = w0;
+ gl_TessLevelOuter[2] = n0;
- // Changing the inner level to 1 when w0 == w1 == 1 collapses the entire patch to a
+ // Changing the inner level to 1 when n0 == n1 == 1 collapses the entire patch to a
// single triangle. Otherwise, we need an inner level of 2 so our curve triangles
// have an interior point to originate from.
- gl_TessLevelInner[0] = min(max(w0, w1), 2.0);
-
- X[gl_InvocationID /*== 0*/] = vec4(P[0].x, P[1].x, P[2].x, P[3].x);
- Y[gl_InvocationID /*== 0*/] = vec4(P[0].y, P[1].y, P[2].y, P[3].y);
- w[gl_InvocationID /*== 0*/] = P[1].z;
+ gl_TessLevelInner[0] = min(max(n0, n1), 2.0);
})");
return code;
@@ -182,9 +184,8 @@
uniform vec4 sk_RTAdjust;
- in vec4 X[];
- in vec4 Y[];
- in float w[];
+ patch in mat4x2 rationalCubicXY;
+ patch in float rationalCubicW;
void main() {
// Locate our parametric point of interest. T ramps from [0..1/2] on the left edge
@@ -193,7 +194,10 @@
// the interior vertex, the below fma() works in all 3 scenarios.
float T = fma(.5, gl_TessCoord.y, gl_TessCoord.z);
- mat4x3 P = transpose(mat3x4(X[0], Y[0], 1,w[0],w[0],1));
+ mat4x3 P = mat4x3(rationalCubicXY[0], 1,
+ rationalCubicXY[1], rationalCubicW,
+ rationalCubicXY[2], rationalCubicW,
+ rationalCubicXY[3], 1);
vec2 vertexpos = eval_rational_cubic(P, T);
if (all(notEqual(gl_TessCoord.xz, vec2(0)))) {
// We are the interior point of the patch; center it inside [C(0), C(.5), C(1)].
@@ -217,43 +221,47 @@
const GrGLSLUniformHandler&,
const GrShaderCaps&) const override {
SkString code(versionAndExtensionDecls);
- code.append(kWangsFormulaCubicFn);
+ code.appendf(R"(
+ #define PRECISION %f)", GrTessellationPathRenderer::kLinearizationPrecision);
code.append(kSkSLTypeDefs);
+ code.append(GrWangsFormula::as_sksl(true/*hasConics*/));
code.append(kUnpackRationalCubicFn);
code.append(R"(
layout(vertices = 1) out;
- in vec2 vsPt[];
- out vec4 X[];
- out vec4 Y[];
- out float w[];
- out vec2 fanpoint[];
+ in vec2 P[];
+ patch out mat4x2 rationalCubicXY;
+ patch out float rationalCubicW;
+ patch out vec2 fanpoint;
void main() {
- mat4x3 P = unpack_rational_cubic(vsPt[0], vsPt[1], vsPt[2], vsPt[3]);
+ // Figure out how many segments to divide the curve into.
+ float w = isinf(P[3].y) ? P[3].x : -1; // w<0 means cubic.
+ float n = wangs_formula(PRECISION, P[0], P[1], P[2], P[3], w);
- // Figure out how many segments to divide the curve into. To do this we simply call
- // Wang's formula for integral cubics with the down-projected points. This appears
- // to be an upper bound on what the actual number of subdivisions would have been.
- float num_segments = wangs_formula_cubic(P[0].xy, P[1].xy/P[1].z, P[2].xy/P[2].z,
- P[3].xy);
-
- // Tessellate the first side of the patch into num_segments triangles.
- gl_TessLevelOuter[0] = num_segments;
+ // Tessellate the first side of the patch into n triangles.
+ gl_TessLevelOuter[0] = n;
// Leave the other two sides of the patch as single segments.
gl_TessLevelOuter[1] = 1.0;
gl_TessLevelOuter[2] = 1.0;
- // Changing the inner level to 1 when num_segments == 1 collapses the entire
+ // Changing the inner level to 1 when n == 1 collapses the entire
// patch to a single triangle. Otherwise, we need an inner level of 2 so our curve
// triangles have an interior point to originate from.
- gl_TessLevelInner[0] = min(num_segments, 2.0);
+ gl_TessLevelInner[0] = min(n, 2.0);
- X[gl_InvocationID /*== 0*/] = vec4(P[0].x, P[1].x, P[2].x, P[3].x);
- Y[gl_InvocationID /*== 0*/] = vec4(P[0].y, P[1].y, P[2].y, P[3].y);
- w[gl_InvocationID /*== 0*/] = P[1].z;
- fanpoint[gl_InvocationID /*== 0*/] = vsPt[4];
+ if (w < 0) {
+ rationalCubicXY = mat4x2(P[0], P[1], P[2], P[3]);
+ rationalCubicW = 1;
+ } else {
+ // Convert the conic to a rational cubic in projected form.
+ rationalCubicXY = mat4x2(P[0],
+ mix(vec4(P[0], P[2]), (P[1] * w).xyxy, 2.0/3.0),
+ P[2]);
+ rationalCubicW = fma(w, 2.0/3.0, 1.0/3.0);
+ }
+ fanpoint = P[4];
})");
return code;
@@ -271,10 +279,9 @@
uniform vec4 sk_RTAdjust;
- in vec4 X[];
- in vec4 Y[];
- in float w[];
- in vec2 fanpoint[];
+ patch in mat4x2 rationalCubicXY;
+ patch in float rationalCubicW;
+ patch in vec2 fanpoint[];
void main() {
// Locate our parametric point of interest. It is equal to the barycentric
@@ -283,7 +290,10 @@
// NOTE: We are on the tessellated edge when the barycentric x-coordinate == 0.
float T = (gl_TessCoord.x == 0.0) ? gl_TessCoord.y : 0.5;
- mat4x3 P = transpose(mat3x4(X[0], Y[0], 1,w[0],w[0],1));
+ mat4x3 P = mat4x3(rationalCubicXY[0], 1,
+ rationalCubicXY[1], rationalCubicW,
+ rationalCubicXY[2], rationalCubicW,
+ rationalCubicXY[3], 1);
vec2 vertexpos = eval_rational_cubic(P, T);
if (gl_TessCoord.x == 1.0) {
diff --git a/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp b/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp
index 285e87c..0374666 100644
--- a/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp
+++ b/src/gpu/tessellate/GrStrokeInstancedShaderImpl.cpp
@@ -95,11 +95,11 @@
args.fVertBuilder->codeAppend(R"(
float4x2 P = float4x2(pts01Attr, pts23Attr);
float2 lastControlPoint = argsAttr.xy;
- float W = -1; // W<0 means the curve is an integral cubic.)");
+ float w = -1; // w<0 means the curve is an integral cubic.)");
if (shader.hasConics()) {
args.fVertBuilder->codeAppend(R"(
if (isinf(P[3].y)) {
- W = P[3].x; // The curve is actually a conic.
+ w = P[3].x; // The curve is actually a conic.
P[3] = P[2]; // Setting p3 equal to p2 works for the remaining rotational logic.
})");
}
@@ -113,7 +113,8 @@
args.fVertBuilder->codeAppend(R"(
// Find how many parametric segments this stroke requires.
- float numParametricSegments = min(wangs_formula(PARAMETRIC_PRECISION, P, W),
+ float numParametricSegments = min(wangs_formula(PARAMETRIC_PRECISION,
+ P[0], P[1], P[2], P[3], w),
float(1 << MAX_PARAMETRIC_SEGMENTS_LOG2));
if (P[0] == P[1] && P[2] == P[3]) {
// This is how we describe lines, but Wang's formula does not return 1 in this case.
diff --git a/src/gpu/tessellate/GrStrokeShader.cpp b/src/gpu/tessellate/GrStrokeShader.cpp
index 1368e25..661b5e0 100644
--- a/src/gpu/tessellate/GrStrokeShader.cpp
+++ b/src/gpu/tessellate/GrStrokeShader.cpp
@@ -74,7 +74,7 @@
//
// // Values provided by either uniforms or attribs.
// float4x2 P;
- // float W;
+ // float w;
// float STROKE_RADIUS;
// float 2x2 AFFINE_MATRIX;
// float2 TRANSLATE;
@@ -105,17 +105,17 @@
float2 D = P[3] - P[0];)");
if (hasConics) {
code->append(R"(
- if (W >= 0.0) {
+ if (w >= 0.0) {
// P0..P2 represent a conic and P3==P2. The derivative of a conic has a cumbersome
// order-4 denominator. However, this isn't necessary if we are only interested in a
// vector in the same *direction* as a given tangent line. Since the denominator scales
// dx and dy uniformly, we can throw it out completely after evaluating the derivative
// with the standard quotient rule. This leaves us with a simpler quadratic function
// that we use to find a tangent.
- C *= W;
+ C *= w;
B = .5*D - C;
- A = (W - 1.0) * D;
- P[1] *= W;
+ A = (w - 1.0) * D;
+ P[1] *= w;
} else {)");
} else {
code->append(R"(
@@ -223,13 +223,13 @@
if (hasConics) {
code->append(R"(
// Evaluate the conic weights at T.
- float u = unchecked_mix(1.0, W, T);
- float v = unchecked_mix(W, 1.0, T);
+ float u = unchecked_mix(1.0, w, T);
+ float v = unchecked_mix(w, 1.0, T);
float uv = unchecked_mix(u, v, T);)");
}
code->appendf(R"(
- strokeCoord =%s abcd;)", (hasConics) ? " (W >= 0.0) ? abc/uv :" : "");
+ strokeCoord =%s abcd;)", (hasConics) ? " (w >= 0.0) ? abc/uv :" : "");
code->append(R"(
// If we went with T=parametricT, then update the tangent. Otherwise leave it at the radial
@@ -237,7 +237,7 @@
// tangent.)
if (T != radialT) {)");
code->appendf(R"(
- tangent =%s bcd - abc;)", (hasConics) ? " (W >= 0.0) ? bc*u - ab*v :" : "");
+ tangent =%s bcd - abc;)", (hasConics) ? " (w >= 0.0) ? bc*u - ab*v :" : "");
code->append(R"(
}
} else {
diff --git a/src/gpu/tessellate/GrStrokeShader.h b/src/gpu/tessellate/GrStrokeShader.h
index c82fd1c..3677648 100644
--- a/src/gpu/tessellate/GrStrokeShader.h
+++ b/src/gpu/tessellate/GrStrokeShader.h
@@ -246,7 +246,7 @@
//
// // Values provided by either uniforms or attribs.
// float4x2 P;
- // float W;
+ // float w;
// float STROKE_RADIUS;
// float 2x2 AFFINE_MATRIX;
// float2 TRANSLATE;
diff --git a/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp b/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp
index 8600ff4..f7d2a6f 100644
--- a/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellationShaderImpl.cpp
@@ -435,7 +435,8 @@
// Calculate the number of parametric segments. The final tessellated strip will be a
// composition of these parametric segments as well as radial segments.
float w = isinf(P[3].y) ? P[3].x : -1.0; // w<0 means the curve is an integral cubic.
- float numParametricSegments = wangs_formula(PARAMETRIC_PRECISION, P, w);
+ float numParametricSegments = wangs_formula(PARAMETRIC_PRECISION,
+ P[0], P[1], P[2], P[3], w);
if (P[0] == P[1] && P[2] == P[3]) {
// This is how the patch builder articulates lineTos but Wang's formula returns
// >>1 segment in this scenario. Assign 1 parametric segment.
@@ -636,12 +637,12 @@
float radsPerSegment = tessellationArgs.z;
float2 tan1 = tcsEndPtEndTan.zw;
bool isFinalEdge = (gl_TessCoord.x == 1);
- float W = -1.0; // W<0 means the curve is an integral cubic.)");
+ float w = -1.0; // w<0 means the curve is an integral cubic.)");
if (shader.hasConics()) {
code.append(R"(
if (isinf(P[3].y)) {
- W = P[3].x; // The curve is actually a conic.
+ w = P[3].x; // The curve is actually a conic.
P[3] = P[2]; // Setting p3 equal to p2 works for the remaining rotational logic.
})");
}