No chop at y extrema for cubics

The new Delta AA scan converter does not need the edge to be updated
with monotonic Y so chopping at y extrema is not necessary. Removing
such chopping brings ~10% performance increase to chalkboard.svg which
has tons of small cubics (the same is true for many svgs I saw).

We didn't remove the chopping for quads because that does not bring
a significant speedup. Moreover, dropping those y extremas would make
our strokecircle animation look a little more wobbly (because we would
have fewer divisions for the quads at the top and bottom of the circle).

Bug: skia:
Change-Id: I3984d2619f9f77269ed24e8cbfa9f1429ebca4a8
Reviewed-on: https://skia-review.googlesource.com/31940
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Yuqian Li <liyuqian@google.com>
diff --git a/src/core/SkAnalyticEdge.cpp b/src/core/SkAnalyticEdge.cpp
index 7cb6493..e2da090 100644
--- a/src/core/SkAnalyticEdge.cpp
+++ b/src/core/SkAnalyticEdge.cpp
@@ -20,6 +20,14 @@
     SkASSERT(fWinding == 1 || fWinding == -1);
     SkASSERT(fCurveCount != 0);
 
+    // We don't chop at y extrema for cubics so the y is not guaranteed to be increasing for them.
+    // In that case, we have to swap x/y and negate the winding.
+    if (y0 > y1) {
+        SkTSwap(x0, x1);
+        SkTSwap(y0, y1);
+        fWinding = -fWinding;
+    }
+
     SkASSERT(y0 <= y1);
 
     SkFDot6 dx = SkFixedToFDot6(x1 - x0);
@@ -48,10 +56,10 @@
     return true;
 }
 
-bool SkAnalyticEdge::update(SkFixed last_y) {
+bool SkAnalyticEdge::update(SkFixed last_y, bool sortY) {
     SkASSERT(last_y >= fLowerY); // we shouldn't update edge if last_y < fLowerY
     if (fCurveCount < 0) {
-        return static_cast<SkAnalyticCubicEdge*>(this)->updateCubic();
+        return static_cast<SkAnalyticCubicEdge*>(this)->updateCubic(sortY);
     } else if (fCurveCount > 0) {
         return static_cast<SkAnalyticQuadraticEdge*>(this)->updateQuadratic();
     }
@@ -147,10 +155,10 @@
     return success;
 }
 
-bool SkAnalyticCubicEdge::setCubic(const SkPoint pts[4]) {
+bool SkAnalyticCubicEdge::setCubic(const SkPoint pts[4], bool sortY) {
     fRiteE = nullptr;
 
-    if (!fCEdge.setCubicWithoutUpdate(pts, kDefaultAccuracy)) {
+    if (!fCEdge.setCubicWithoutUpdate(pts, kDefaultAccuracy, sortY)) {
         return false;
     }
 
@@ -174,10 +182,10 @@
 
     fSnappedY = fCEdge.fCy;
 
-    return this->updateCubic();
+    return this->updateCubic(sortY);
 }
 
-bool SkAnalyticCubicEdge::updateCubic() {
+bool SkAnalyticCubicEdge::updateCubic(bool sortY) {
     int     success;
     int     count = fCurveCount;
     SkFixed oldx = fCEdge.fCx;
@@ -205,14 +213,14 @@
 
         // we want to say SkASSERT(oldy <= newy), but our finite fixedpoint
         // doesn't always achieve that, so we have to explicitly pin it here.
-        if (newy < oldy) {
+        if (sortY && newy < oldy) {
             newy = oldy;
         }
 
         SkFixed newSnappedY = SnapY(newy);
         // we want to SkASSERT(snappedNewY <= fCEdge.fCLastY), but our finite fixedpoint
         // doesn't always achieve that, so we have to explicitly pin it here.
-        if (fCEdge.fCLastY < newSnappedY) {
+        if (sortY && fCEdge.fCLastY < newSnappedY) {
             newSnappedY = fCEdge.fCLastY;
             count = 0;
         }
diff --git a/src/core/SkAnalyticEdge.h b/src/core/SkAnalyticEdge.h
index 533d769..13f6257 100644
--- a/src/core/SkAnalyticEdge.h
+++ b/src/core/SkAnalyticEdge.h
@@ -81,7 +81,7 @@
     inline bool updateLine(SkFixed ax, SkFixed ay, SkFixed bx, SkFixed by, SkFixed slope);
 
     // return true if we're NOT done with this edge
-    bool update(SkFixed last_y);
+    bool update(SkFixed last_y, bool sortY = true);
 
 #ifdef SK_DEBUG
     void dump() const {
@@ -124,8 +124,8 @@
 
     SkFixed fSnappedY; // to make sure that y is increasing with smooth jump and snapping
 
-    bool setCubic(const SkPoint pts[4]);
-    bool updateCubic();
+    bool setCubic(const SkPoint pts[4], bool sortY = true);
+    bool updateCubic(bool sortY = true);
     inline void keepContinuous() {
         SkASSERT(SkAbs32(fX - SkFixedMul(fDX, fY - SnapY(fCEdge.fCy)) - fCEdge.fCx) < SK_Fixed1);
         fCEdge.fCx = fX;
@@ -223,7 +223,11 @@
     SkPoint fP3;
 
     bool set(const SkPoint pts[4]){
-        if (IsEmpty(pts[0].fY, pts[3].fY)) {
+        // We do not chop at y extrema for cubics so pts[0], pts[1], pts[2], pts[3] may not be
+        // monotonic. Therefore, we have to check the emptiness for all three pairs, instead of just
+        // checking IsEmpty(pts[0].fY, pts[3].fY).
+        if (IsEmpty(pts[0].fY, pts[1].fY) && IsEmpty(pts[1].fY, pts[2].fY) &&
+                IsEmpty(pts[2].fY, pts[3].fY)) {
             return false;
         }
         fCount = 4;
diff --git a/src/core/SkEdge.cpp b/src/core/SkEdge.cpp
index f8e8d98..1bb8e9e 100644
--- a/src/core/SkEdge.cpp
+++ b/src/core/SkEdge.cpp
@@ -347,7 +347,7 @@
     return SkMax32(SkAbs32(oneThird), SkAbs32(twoThird));
 }
 
-bool SkCubicEdge::setCubicWithoutUpdate(const SkPoint pts[4], int shift) {
+bool SkCubicEdge::setCubicWithoutUpdate(const SkPoint pts[4], int shift, bool sortY) {
     SkFDot6 x0, y0, x1, y1, x2, y2, x3, y3;
 
     {
@@ -374,7 +374,7 @@
     }
 
     int winding = 1;
-    if (y0 > y3)
+    if (sortY && y0 > y3)
     {
         SkTSwap(x0, x3);
         SkTSwap(x1, x2);
@@ -387,7 +387,7 @@
     int bot = SkFDot6Round(y3);
 
     // are we a zero-height cubic (line)?
-    if (top == bot)
+    if (sortY && top == bot)
         return 0;
 
     // compute number of steps needed (1 << shift)
diff --git a/src/core/SkEdge.h b/src/core/SkEdge.h
index 9be9325..509c0c4 100644
--- a/src/core/SkEdge.h
+++ b/src/core/SkEdge.h
@@ -80,7 +80,7 @@
     SkFixed fCDDDx, fCDDDy;
     SkFixed fCLastX, fCLastY;
 
-    bool setCubicWithoutUpdate(const SkPoint pts[4], int shiftUp);
+    bool setCubicWithoutUpdate(const SkPoint pts[4], int shiftUp, bool sortY = true);
     int setCubic(const SkPoint pts[4], int shiftUp);
     int updateCubic();
 };
diff --git a/src/core/SkEdgeBuilder.cpp b/src/core/SkEdgeBuilder.cpp
index 7a02187..1ba9e1d 100644
--- a/src/core/SkEdgeBuilder.cpp
+++ b/src/core/SkEdgeBuilder.cpp
@@ -427,6 +427,10 @@
                     }
                 } break;
                 case SkPath::kCubic_Verb: {
+                    if (fEdgeType == kBezier) {
+                        this->addCubic(pts);
+                        break;
+                    }
                     SkPoint monoY[10];
                     int n = SkChopCubicAtYExtrema(pts, monoY);
                     for (int i = 0; i <= n; i++) {
diff --git a/src/core/SkEdgeBuilder.h b/src/core/SkEdgeBuilder.h
index b876fcf..a549e15 100644
--- a/src/core/SkEdgeBuilder.h
+++ b/src/core/SkEdgeBuilder.h
@@ -21,8 +21,19 @@
 class SkEdgeBuilder {
 public:
     enum EdgeType {
+        // Used in supersampling or non-AA scan coverter; it stores only integral y coordinates.
         kEdge,
+
+        // Used in Analytic AA scan converter; it uses SkFixed to store fractional y.
         kAnalyticEdge,
+
+        // Used in Delta AA scan converter; it's a super-light wrapper of SkPoint, which can then be
+        // used to construct SkAnalyticEdge (kAnalyticEdge) later. We use kBezier to save the memory
+        // allocation time (a SkBezier is much lighter than SkAnalyticEdge or SkEdge). Note that
+        // Delta AA only has to deal with one SkAnalyticEdge at a time (whereas Analytic AA has to
+        // deal with all SkAnalyticEdges at the same time). Thus for Delta AA, we only need to
+        // allocate memory for n SkBeziers and 1 SkAnalyticEdge. (Analytic AA need to allocate
+        // memory for n SkAnalyticEdges.)
         kBezier
     };
 
@@ -76,6 +87,8 @@
     void addCubic(const SkPoint pts[]);
     void addClipper(SkEdgeClipper*);
 
+    EdgeType edgeType() const { return fEdgeType; }
+
     int buildPoly(const SkPath& path, const SkIRect* clip, int shiftUp, bool clipToTheRight);
 
     inline void addPolyLine(SkPoint pts[], char* &edge, size_t edgeSize, char** &edgePtr,
diff --git a/src/core/SkScan_DAAPath.cpp b/src/core/SkScan_DAAPath.cpp
index 68bc15d..de24e49 100644
--- a/src/core/SkScan_DAAPath.cpp
+++ b/src/core/SkScan_DAAPath.cpp
@@ -226,21 +226,27 @@
         SkAnalyticEdge* currE   = &storage;
         bool edgeSet            = false;
 
+        int originalWinding = 1;
+        bool sortY = true;
         switch (bezier->fCount) {
             case 2: {
                 edgeSet = currE->setLine(bezier->fP0, bezier->fP1);
+                originalWinding = currE->fWinding;
                 break;
             }
             case 3: {
                 SkQuad* quad = static_cast<SkQuad*>(bezier);
                 SkPoint pts[3] = {quad->fP0, quad->fP1, quad->fP2};
                 edgeSet = static_cast<SkAnalyticQuadraticEdge*>(currE)->setQuadratic(pts);
+                originalWinding = static_cast<SkAnalyticQuadraticEdge*>(currE)->fQEdge.fWinding;
                 break;
             }
             case 4: {
+                sortY = false;
                 SkCubic* cubic = static_cast<SkCubic*>(bezier);
                 SkPoint pts[4] = {cubic->fP0, cubic->fP1, cubic->fP2, cubic->fP3};
-                edgeSet = static_cast<SkAnalyticCubicEdge*>(currE)->setCubic(pts);
+                edgeSet = static_cast<SkAnalyticCubicEdge*>(currE)->setCubic(pts, sortY);
+                originalWinding = static_cast<SkAnalyticCubicEdge*>(currE)->fCEdge.fWinding;
                 break;
             }
         }
@@ -259,7 +265,9 @@
             if (lowerCeil <= upperFloor + SK_Fixed1) { // only one row is affected by the currE
                 SkFixed rowHeight = currE->fLowerY - currE->fUpperY;
                 SkFixed nextX = currE->fX + SkFixedMul(currE->fDX, rowHeight);
-                add_coverage_delta_segment<true>(iy, rowHeight, currE, nextX, &result);
+                if (iy >= clipRect.fTop && iy < clipRect.fBottom) {
+                    add_coverage_delta_segment<true>(iy, rowHeight, currE, nextX, &result);
+                }
                 continue;
             }
 
@@ -296,12 +304,13 @@
             }
 
             // last partial row
-            if (SkIntToFixed(iy) < currE->fLowerY) {
+            if (SkIntToFixed(iy) < currE->fLowerY && iy >= clipRect.fTop && iy < clipRect.fBottom) {
                 rowHeight = currE->fLowerY - SkIntToFixed(iy);
                 nextX = currE->fX + SkFixedMul(currE->fDX, rowHeight);
                 add_coverage_delta_segment<true>(iy, rowHeight, currE, nextX, &result);
             }
-        } while (currE->update(currE->fLowerY));
+        // Intended assignment to fWinding to restore the maybe-negated winding (during updateLine)
+        } while ((currE->fWinding = originalWinding) && currE->update(currE->fLowerY, sortY));
     }
 }