Extract a GrCCPathParser class that is decoupled from atlas ops
Allows coverage counts ultimately to be drawn either to an atlas or
directly to the framebuffer.
Bug: skia:
Change-Id: I6cc07fce562c223381b89586d19ae98298bafe4d
Reviewed-on: https://skia-review.googlesource.com/96083
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
diff --git a/src/gpu/ccpr/GrCCPathParser.cpp b/src/gpu/ccpr/GrCCPathParser.cpp
new file mode 100644
index 0000000..03fb3d9
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPathParser.cpp
@@ -0,0 +1,454 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPathParser.h"
+
+#include "GrCaps.h"
+#include "GrGpuCommandBuffer.h"
+#include "GrOnFlushResourceProvider.h"
+#include "GrOpFlushState.h"
+#include "SkMathPriv.h"
+#include "SkPath.h"
+#include "SkPathPriv.h"
+#include "SkPoint.h"
+#include "ccpr/GrCCGeometry.h"
+
+using TriangleInstance = GrCCCoverageProcessor::TriangleInstance;
+using CubicInstance = GrCCCoverageProcessor::CubicInstance;
+
+GrCCPathParser::GrCCPathParser(int maxTotalPaths, int maxPathPoints, int numSkPoints,
+ int numSkVerbs)
+ : fLocalDevPtsBuffer(maxPathPoints + 1) // Overallocate by one point to accomodate for
+ // overflow with Sk4f. (See parsePath.)
+ , fGeometry(numSkPoints, numSkVerbs)
+ , fPathsInfo(maxTotalPaths)
+ , fScissorSubBatches(maxTotalPaths)
+ , fTotalPrimitiveCounts{PrimitiveTallies(), PrimitiveTallies()} {
+ // Batches decide what to draw by looking where the previous one ended. Define initial batches
+ // that "end" at the beginning of the data. These will not be drawn, but will only be be read by
+ // the first actual batch.
+ fScissorSubBatches.push_back() = {PrimitiveTallies(), SkIRect::MakeEmpty()};
+ fCoverageCountBatches.push_back() = {PrimitiveTallies(), fScissorSubBatches.count()};
+}
+
+void GrCCPathParser::parsePath(const SkMatrix& m, const SkPath& path, SkRect* devBounds,
+ SkRect* devBounds45) {
+ const SkPoint* pts = SkPathPriv::PointData(path);
+ int numPts = path.countPoints();
+ SkASSERT(numPts + 1 <= fLocalDevPtsBuffer.count());
+
+ if (!numPts) {
+ devBounds->setEmpty();
+ devBounds45->setEmpty();
+ this->parsePath(path, nullptr);
+ return;
+ }
+
+ // m45 transforms path points into "45 degree" device space. A bounding box in this space gives
+ // the circumscribing octagon's diagonals. We could use SK_ScalarRoot2Over2, but an orthonormal
+ // transform is not necessary as long as the shader uses the correct inverse.
+ SkMatrix m45;
+ m45.setSinCos(1, 1);
+ m45.preConcat(m);
+
+ // X,Y,T are two parallel view matrices that accumulate two bounding boxes as they map points:
+ // device-space bounds and "45 degree" device-space bounds (| 1 -1 | * devCoords).
+ // | 1 1 |
+ Sk4f X = Sk4f(m.getScaleX(), m.getSkewY(), m45.getScaleX(), m45.getSkewY());
+ Sk4f Y = Sk4f(m.getSkewX(), m.getScaleY(), m45.getSkewX(), m45.getScaleY());
+ Sk4f T = Sk4f(m.getTranslateX(), m.getTranslateY(), m45.getTranslateX(), m45.getTranslateY());
+
+ // Map the path's points to device space and accumulate bounding boxes.
+ Sk4f devPt = SkNx_fma(Y, Sk4f(pts[0].y()), T);
+ devPt = SkNx_fma(X, Sk4f(pts[0].x()), devPt);
+ Sk4f topLeft = devPt;
+ Sk4f bottomRight = devPt;
+
+ // Store all 4 values [dev.x, dev.y, dev45.x, dev45.y]. We are only interested in the first two,
+ // and will overwrite [dev45.x, dev45.y] with the next point. This is why the dst buffer must
+ // be at least one larger than the number of points.
+ devPt.store(&fLocalDevPtsBuffer[0]);
+
+ for (int i = 1; i < numPts; ++i) {
+ devPt = SkNx_fma(Y, Sk4f(pts[i].y()), T);
+ devPt = SkNx_fma(X, Sk4f(pts[i].x()), devPt);
+ topLeft = Sk4f::Min(topLeft, devPt);
+ bottomRight = Sk4f::Max(bottomRight, devPt);
+ devPt.store(&fLocalDevPtsBuffer[i]);
+ }
+
+ SkPoint topLeftPts[2], bottomRightPts[2];
+ topLeft.store(topLeftPts);
+ bottomRight.store(bottomRightPts);
+ devBounds->setLTRB(topLeftPts[0].x(), topLeftPts[0].y(), bottomRightPts[0].x(),
+ bottomRightPts[0].y());
+ devBounds45->setLTRB(topLeftPts[1].x(), topLeftPts[1].y(), bottomRightPts[1].x(),
+ bottomRightPts[1].y());
+
+ this->parsePath(path, fLocalDevPtsBuffer.get());
+}
+
+void GrCCPathParser::parseDeviceSpacePath(const SkPath& deviceSpacePath) {
+ this->parsePath(deviceSpacePath, SkPathPriv::PointData(deviceSpacePath));
+}
+
+void GrCCPathParser::parsePath(const SkPath& path, const SkPoint* deviceSpacePts) {
+ SkASSERT(!fInstanceBuffer); // Can't call after finalize().
+ SkASSERT(!fParsingPath); // Call saveParsedPath() or discardParsedPath() for the last one first.
+ SkDEBUGCODE(fParsingPath = true);
+ SkASSERT(path.isEmpty() || deviceSpacePts);
+
+ fCurrPathPointsIdx = fGeometry.points().count();
+ fCurrPathVerbsIdx = fGeometry.verbs().count();
+ fCurrPathPrimitiveCounts = PrimitiveTallies();
+
+ fGeometry.beginPath();
+
+ if (path.isEmpty()) {
+ return;
+ }
+
+ int ptsIdx = 0;
+ bool insideContour = false;
+
+ for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
+ switch (verb) {
+ case SkPath::kMove_Verb:
+ this->endContourIfNeeded(insideContour);
+ fGeometry.beginContour(deviceSpacePts[ptsIdx]);
+ ++ptsIdx;
+ insideContour = true;
+ continue;
+ case SkPath::kClose_Verb:
+ this->endContourIfNeeded(insideContour);
+ insideContour = false;
+ continue;
+ case SkPath::kLine_Verb:
+ fGeometry.lineTo(deviceSpacePts[ptsIdx]);
+ ++ptsIdx;
+ continue;
+ case SkPath::kQuad_Verb:
+ fGeometry.quadraticTo(deviceSpacePts[ptsIdx], deviceSpacePts[ptsIdx + 1]);
+ ptsIdx += 2;
+ continue;
+ case SkPath::kCubic_Verb:
+ fGeometry.cubicTo(deviceSpacePts[ptsIdx], deviceSpacePts[ptsIdx + 1],
+ deviceSpacePts[ptsIdx + 2]);
+ ptsIdx += 3;
+ continue;
+ case SkPath::kConic_Verb:
+ SK_ABORT("Conics are not supported.");
+ default:
+ SK_ABORT("Unexpected path verb.");
+ }
+ }
+
+ this->endContourIfNeeded(insideContour);
+}
+
+void GrCCPathParser::endContourIfNeeded(bool insideContour) {
+ if (insideContour) {
+ fCurrPathPrimitiveCounts += fGeometry.endContour();
+ }
+}
+
+void GrCCPathParser::saveParsedPath(ScissorMode scissorMode, const SkIRect& clippedDevIBounds,
+ int16_t atlasOffsetX, int16_t atlasOffsetY) {
+ SkASSERT(fParsingPath);
+
+ fPathsInfo.push_back() = {scissorMode, atlasOffsetX, atlasOffsetY};
+ fTotalPrimitiveCounts[(int)scissorMode] += fCurrPathPrimitiveCounts;
+
+ if (ScissorMode::kScissored == scissorMode) {
+ fScissorSubBatches.push_back() = {fTotalPrimitiveCounts[(int)ScissorMode::kScissored],
+ clippedDevIBounds.makeOffset(atlasOffsetX, atlasOffsetY)};
+ }
+
+ SkDEBUGCODE(fParsingPath = false);
+}
+
+void GrCCPathParser::discardParsedPath() {
+ SkASSERT(fParsingPath);
+ fGeometry.resize_back(fCurrPathPointsIdx, fCurrPathVerbsIdx);
+ SkDEBUGCODE(fParsingPath = false);
+}
+
+GrCCPathParser::CoverageCountBatchID GrCCPathParser::closeCurrentBatch() {
+ SkASSERT(!fInstanceBuffer);
+ SkASSERT(!fCoverageCountBatches.empty());
+
+ int maxMeshes = 1 + fScissorSubBatches.count() -
+ fCoverageCountBatches.back().fEndScissorSubBatchIdx;
+ fMaxMeshesPerDraw = SkTMax(fMaxMeshesPerDraw, maxMeshes);
+
+ fCoverageCountBatches.push_back() = {
+ fTotalPrimitiveCounts[(int)ScissorMode::kNonScissored],
+ fScissorSubBatches.count()
+ };
+ return fCoverageCountBatches.count() - 1;
+}
+
+// Emits a contour's triangle fan.
+//
+// Classic Redbook fanning would be the triangles: [0 1 2], [0 2 3], ..., [0 n-2 n-1].
+//
+// This function emits the triangle: [0 n/3 n*2/3], and then recurses on all three sides. The
+// advantage to this approach is that for a convex-ish contour, it generates larger triangles.
+// Classic fanning tends to generate long, skinny triangles, which are expensive to draw since they
+// have a longer perimeter to rasterize and antialias.
+//
+// The indices array indexes the fan's points (think: glDrawElements), and must have at least log3
+// elements past the end for this method to use as scratch space.
+//
+// Returns the next triangle instance after the final one emitted.
+static TriangleInstance* emit_recursive_fan(const SkTArray<SkPoint, true>& pts,
+ SkTArray<int32_t, true>& indices, int firstIndex,
+ int indexCount, const Sk2f& atlasOffset,
+ TriangleInstance out[]) {
+ if (indexCount < 3) {
+ return out;
+ }
+
+ int32_t oneThirdCount = indexCount / 3;
+ int32_t twoThirdsCount = (2 * indexCount) / 3;
+ out++->set(pts[indices[firstIndex]], pts[indices[firstIndex + oneThirdCount]],
+ pts[indices[firstIndex + twoThirdsCount]], atlasOffset);
+
+ out = emit_recursive_fan(pts, indices, firstIndex, oneThirdCount + 1, atlasOffset, out);
+ out = emit_recursive_fan(pts, indices, firstIndex + oneThirdCount,
+ twoThirdsCount - oneThirdCount + 1, atlasOffset, out);
+
+ int endIndex = firstIndex + indexCount;
+ int32_t oldValue = indices[endIndex];
+ indices[endIndex] = indices[firstIndex];
+ out = emit_recursive_fan(pts, indices, firstIndex + twoThirdsCount,
+ indexCount - twoThirdsCount + 1, atlasOffset, out);
+ indices[endIndex] = oldValue;
+
+ return out;
+}
+
+bool GrCCPathParser::finalize(GrOnFlushResourceProvider* onFlushRP) {
+ SkASSERT(!fParsingPath); // Call saveParsedPath() or discardParsedPath().
+ SkASSERT(fCoverageCountBatches.back().fEndNonScissorIndices == // Call closeCurrentBatch().
+ fTotalPrimitiveCounts[(int)ScissorMode::kNonScissored]);
+ SkASSERT(fCoverageCountBatches.back().fEndScissorSubBatchIdx == fScissorSubBatches.count());
+
+ // Here we build a single instance buffer to share with every internal batch.
+ //
+ // CCPR processs 3 different types of primitives: triangles, quadratics, cubics. Each primitive
+ // type is further divided into instances that require a scissor and those that don't. This
+ // leaves us with 3*2 = 6 independent instance arrays to build for the GPU.
+ //
+ // Rather than place each instance array in its own GPU buffer, we allocate a single
+ // megabuffer and lay them all out side-by-side. We can offset the "baseInstance" parameter in
+ // our draw calls to direct the GPU to the applicable elements within a given array.
+ //
+ // We already know how big to make each of the 6 arrays from fTotalPrimitiveCounts, so layout is
+ // straightforward. Start with triangles and quadratics. They both view the instance buffer as
+ // an array of TriangleInstance[], so we can begin at zero and lay them out one after the other.
+ fBaseInstances[0].fTriangles = 0;
+ fBaseInstances[1].fTriangles = fBaseInstances[0].fTriangles +
+ fTotalPrimitiveCounts[0].fTriangles;
+ fBaseInstances[0].fQuadratics = fBaseInstances[1].fTriangles +
+ fTotalPrimitiveCounts[1].fTriangles;
+ fBaseInstances[1].fQuadratics = fBaseInstances[0].fQuadratics +
+ fTotalPrimitiveCounts[0].fQuadratics;
+ int triEndIdx = fBaseInstances[1].fQuadratics + fTotalPrimitiveCounts[1].fQuadratics;
+
+ // Cubics view the same instance buffer as an array of CubicInstance[]. So, reinterpreting the
+ // instance data as CubicInstance[], we start them on the first index that will not overwrite
+ // previous TriangleInstance data.
+ int cubicBaseIdx =
+ GR_CT_DIV_ROUND_UP(triEndIdx * sizeof(TriangleInstance), sizeof(CubicInstance));
+ fBaseInstances[0].fCubics = cubicBaseIdx;
+ fBaseInstances[1].fCubics = fBaseInstances[0].fCubics + fTotalPrimitiveCounts[0].fCubics;
+ int cubicEndIdx = fBaseInstances[1].fCubics + fTotalPrimitiveCounts[1].fCubics;
+
+ fInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType,
+ cubicEndIdx * sizeof(CubicInstance));
+ if (!fInstanceBuffer) {
+ return false;
+ }
+
+ TriangleInstance* triangleInstanceData = static_cast<TriangleInstance*>(fInstanceBuffer->map());
+ CubicInstance* cubicInstanceData = reinterpret_cast<CubicInstance*>(triangleInstanceData);
+ SkASSERT(cubicInstanceData);
+
+ PathInfo* currPathInfo = fPathsInfo.begin();
+ float atlasOffsetX = 0.0, atlasOffsetY = 0.0;
+ Sk2f atlasOffset;
+ int ptsIdx = -1;
+ PrimitiveTallies instanceIndices[2] = {fBaseInstances[0], fBaseInstances[1]};
+ PrimitiveTallies* currIndices = nullptr;
+ SkSTArray<256, int32_t, true> currFan;
+
+ const SkTArray<SkPoint, true>& pts = fGeometry.points();
+
+ // Expand the ccpr verbs into GPU instance buffers.
+ for (GrCCGeometry::Verb verb : fGeometry.verbs()) {
+ switch (verb) {
+ case GrCCGeometry::Verb::kBeginPath:
+ SkASSERT(currFan.empty());
+ currIndices = &instanceIndices[(int)currPathInfo->fScissorMode];
+ atlasOffsetX = static_cast<float>(currPathInfo->fAtlasOffsetX);
+ atlasOffsetY = static_cast<float>(currPathInfo->fAtlasOffsetY);
+ atlasOffset = {atlasOffsetX, atlasOffsetY};
+ ++currPathInfo;
+ continue;
+
+ case GrCCGeometry::Verb::kBeginContour:
+ SkASSERT(currFan.empty());
+ currFan.push_back(++ptsIdx);
+ continue;
+
+ case GrCCGeometry::Verb::kLineTo:
+ SkASSERT(!currFan.empty());
+ currFan.push_back(++ptsIdx);
+ continue;
+
+ case GrCCGeometry::Verb::kMonotonicQuadraticTo:
+ SkASSERT(!currFan.empty());
+ triangleInstanceData[currIndices->fQuadratics++].set(&pts[ptsIdx], atlasOffset);
+ currFan.push_back(ptsIdx += 2);
+ continue;
+
+ case GrCCGeometry::Verb::kMonotonicCubicTo:
+ SkASSERT(!currFan.empty());
+ cubicInstanceData[currIndices->fCubics++].set(&pts[ptsIdx], atlasOffsetX,
+ atlasOffsetY);
+ currFan.push_back(ptsIdx += 3);
+ continue;
+
+ case GrCCGeometry::Verb::kEndClosedContour: // endPt == startPt.
+ SkASSERT(!currFan.empty());
+ currFan.pop_back();
+ // fallthru.
+ case GrCCGeometry::Verb::kEndOpenContour: // endPt != startPt.
+ if (currFan.count() >= 3) {
+ int fanSize = currFan.count();
+ // Reserve space for emit_recursive_fan. Technically this can grow to
+ // fanSize + log3(fanSize), but we approximate with log2.
+ currFan.push_back_n(SkNextLog2(fanSize));
+ SkDEBUGCODE(TriangleInstance* end =)
+ emit_recursive_fan(pts, currFan, 0, fanSize, atlasOffset,
+ triangleInstanceData + currIndices->fTriangles);
+ currIndices->fTriangles += fanSize - 2;
+ SkASSERT(triangleInstanceData + currIndices->fTriangles == end);
+ }
+ currFan.reset();
+ continue;
+ }
+ }
+
+ fInstanceBuffer->unmap();
+
+ SkASSERT(currPathInfo == fPathsInfo.end());
+ SkASSERT(ptsIdx == pts.count() - 1);
+ SkASSERT(instanceIndices[0].fTriangles == fBaseInstances[1].fTriangles);
+ SkASSERT(instanceIndices[1].fTriangles == fBaseInstances[0].fQuadratics);
+ SkASSERT(instanceIndices[0].fQuadratics == fBaseInstances[1].fQuadratics);
+ SkASSERT(instanceIndices[1].fQuadratics == triEndIdx);
+ SkASSERT(instanceIndices[0].fCubics == fBaseInstances[1].fCubics);
+ SkASSERT(instanceIndices[1].fCubics == cubicEndIdx);
+
+ fMeshesScratchBuffer.reserve(fMaxMeshesPerDraw);
+ fDynamicStatesScratchBuffer.reserve(fMaxMeshesPerDraw);
+
+ return true;
+}
+
+void GrCCPathParser::drawCoverageCount(GrOpFlushState* flushState, CoverageCountBatchID batchID,
+ const SkIRect& drawBounds) const {
+ using RenderPass = GrCCCoverageProcessor::RenderPass;
+
+ SkASSERT(fInstanceBuffer);
+
+ GrPipeline pipeline(flushState->drawOpArgs().fProxy, GrPipeline::ScissorState::kEnabled,
+ SkBlendMode::kPlus);
+
+ // Triangles.
+ this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleHulls,
+ &PrimitiveTallies::fTriangles, drawBounds);
+ this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleEdges,
+ &PrimitiveTallies::fTriangles, drawBounds); // Might get skipped.
+ this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleCorners,
+ &PrimitiveTallies::fTriangles, drawBounds);
+
+ // Quadratics.
+ this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kQuadraticHulls,
+ &PrimitiveTallies::fQuadratics, drawBounds);
+ this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kQuadraticCorners,
+ &PrimitiveTallies::fQuadratics, drawBounds);
+
+ // Cubics.
+ this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kCubicHulls,
+ &PrimitiveTallies::fCubics, drawBounds);
+ this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kCubicCorners,
+ &PrimitiveTallies::fCubics, drawBounds);
+}
+
+void GrCCPathParser::drawRenderPass(GrOpFlushState* flushState, const GrPipeline& pipeline,
+ CoverageCountBatchID batchID,
+ GrCCCoverageProcessor::RenderPass renderPass,
+ int PrimitiveTallies::*instanceType,
+ const SkIRect& drawBounds) const {
+ SkASSERT(pipeline.getScissorState().enabled());
+
+ if (!GrCCCoverageProcessor::DoesRenderPass(renderPass, *flushState->caps().shaderCaps())) {
+ return;
+ }
+
+ // Don't call reset(), as that also resets the reserve count.
+ fMeshesScratchBuffer.pop_back_n(fMeshesScratchBuffer.count());
+ fDynamicStatesScratchBuffer.pop_back_n(fDynamicStatesScratchBuffer.count());
+
+ GrCCCoverageProcessor proc(flushState->resourceProvider(), renderPass,
+ *flushState->caps().shaderCaps());
+
+ SkASSERT(batchID > 0);
+ SkASSERT(batchID < fCoverageCountBatches.count());
+ const CoverageCountBatch& previousBatch = fCoverageCountBatches[batchID - 1];
+ const CoverageCountBatch& batch = fCoverageCountBatches[batchID];
+
+ if (int instanceCount = batch.fEndNonScissorIndices.*instanceType -
+ previousBatch.fEndNonScissorIndices.*instanceType) {
+ SkASSERT(instanceCount > 0);
+ int baseInstance = fBaseInstances[(int)ScissorMode::kNonScissored].*instanceType +
+ previousBatch.fEndNonScissorIndices.*instanceType;
+ proc.appendMesh(fInstanceBuffer.get(), instanceCount, baseInstance, &fMeshesScratchBuffer);
+ fDynamicStatesScratchBuffer.push_back().fScissorRect.setXYWH(0, 0, drawBounds.width(),
+ drawBounds.height());
+ }
+
+ SkASSERT(previousBatch.fEndScissorSubBatchIdx > 0);
+ SkASSERT(batch.fEndScissorSubBatchIdx <= fScissorSubBatches.count());
+ int baseScissorInstance = fBaseInstances[(int)ScissorMode::kScissored].*instanceType;
+ for (int i = previousBatch.fEndScissorSubBatchIdx; i < batch.fEndScissorSubBatchIdx; ++i) {
+ const ScissorSubBatch& previousSubBatch = fScissorSubBatches[i - 1];
+ const ScissorSubBatch& scissorSubBatch = fScissorSubBatches[i];
+ int startIndex = previousSubBatch.fEndPrimitiveIndices.*instanceType;
+ int instanceCount = scissorSubBatch.fEndPrimitiveIndices.*instanceType - startIndex;
+ if (!instanceCount) {
+ continue;
+ }
+ SkASSERT(instanceCount > 0);
+ proc.appendMesh(fInstanceBuffer.get(), instanceCount,
+ baseScissorInstance + startIndex, &fMeshesScratchBuffer);
+ fDynamicStatesScratchBuffer.push_back().fScissorRect = scissorSubBatch.fScissor;
+ }
+
+ SkASSERT(fMeshesScratchBuffer.count() == fDynamicStatesScratchBuffer.count());
+ SkASSERT(fMeshesScratchBuffer.count() <= fMaxMeshesPerDraw);
+
+ if (!fMeshesScratchBuffer.empty()) {
+ SkASSERT(flushState->rtCommandBuffer());
+ flushState->rtCommandBuffer()->draw(pipeline, proc, fMeshesScratchBuffer.begin(),
+ fDynamicStatesScratchBuffer.begin(),
+ fMeshesScratchBuffer.count(), SkRect::Make(drawBounds));
+ }
+}