Revert "Simplify fill path call by removing do_fill_path"

This reverts commit 12f322b9d4e98619bd128f39b02d3a6f3b78ba79.

Reason for revert: Unexpectedly break Chrome layout tests. Will check why.

Original change's description:
> Simplify fill path call by removing do_fill_path
> 
> The git diff is not very informative for this CL.
> Here's a better diff:
> 
> 1. do_fill_path is removed and its content is copied to AntiFillPath
> 
> 2. Any call to do_fill_path is removed.
> 
> 3. std::function FillPathFunc is removed (and replaced by direct
>    AAAFillPath, DAAFillPath, and SAAFillPath call).
> 
> 4. The old call chain is:
>      AntiFillPath -> (AAAFillPath/DAAFillPath/...)
>                   -> do_fill_path
>                   -> specific FillPathFunc
>    The new call chain is:
>      AntiFillPath -> AAAFillPath/DAAFillPath/SAAFillPath
> 
> This is made possible by the removal of SK_SUPPORT_LEGACY_AA_CHOICE
> which makes sure that AntiFillPath is the only function that makes
> the choice of AAA/DAA/SAA.
> 
> In the next CL, I'll improve the structure of SkScan::AntiFillPath
> to prepare for Threaded Backend's init-once change.
> 
> Bug: skia:
> Change-Id: If6ebbdab207cadb7bfe2cb3fcf33ea3d180c3896
> Reviewed-on: https://skia-review.googlesource.com/67340
> Reviewed-by: Mike Reed <reed@google.com>
> Reviewed-by: Cary Clark <caryclark@google.com>
> Commit-Queue: Yuqian Li <liyuqian@google.com>

TBR=caryclark@google.com,liyuqian@google.com,reed@google.com,caryclark@skia.org

Change-Id: I7d9517574265db5bc372a5749e6480df8e938f2e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:
Reviewed-on: https://skia-review.googlesource.com/67855
Reviewed-by: Yuqian Li <liyuqian@google.com>
Commit-Queue: Yuqian Li <liyuqian@google.com>
diff --git a/src/core/SkScan.h b/src/core/SkScan.h
index 3b04333..8bb9d1f 100644
--- a/src/core/SkScan.h
+++ b/src/core/SkScan.h
@@ -89,12 +89,10 @@
                               const SkRegion*, SkBlitter*);
     static void HairLineRgn(const SkPoint[], int count, const SkRegion*, SkBlitter*);
     static void AntiHairLineRgn(const SkPoint[], int count, const SkRegion*, SkBlitter*);
-    static void AAAFillPath(const SkPath& path, SkBlitter* blitter, const SkIRect& pathIR,
-                            const SkIRect& clipBounds, bool forceRLE);
-    static void DAAFillPath(const SkPath& path, SkBlitter* blitter, const SkIRect& pathIR,
-                            const SkIRect& clipBounds, bool forceRLE);
-    static void SAAFillPath(const SkPath& path, SkBlitter* blitter, const SkIRect& pathIR,
-                            const SkIRect& clipBounds, bool forceRLE);
+    static void AAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
+                            bool forceRLE = false); // SkAAClip uses forceRLE
+    static void DAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
+                            bool forceRLE = false);
 };
 
 /** Assign an SkXRect from a SkIRect, by promoting the src rect's coordinates
diff --git a/src/core/SkScanPriv.h b/src/core/SkScanPriv.h
index 544f5f5d..96ee695 100644
--- a/src/core/SkScanPriv.h
+++ b/src/core/SkScanPriv.h
@@ -78,6 +78,45 @@
     return prev;
 }
 
+static bool fitsInsideLimit(const SkRect& r, SkScalar max) {
+    const SkScalar min = -max;
+    return  r.fLeft > min && r.fTop > min &&
+            r.fRight < max && r.fBottom < max;
+}
+
+static int overflows_short_shift(int value, int shift) {
+    const int s = 16 + shift;
+    return (SkLeftShift(value, s) >> s) - value;
+}
+
+/**
+  Would any of the coordinates of this rectangle not fit in a short,
+  when left-shifted by shift?
+*/
+static int rect_overflows_short_shift(SkIRect rect, int shift) {
+    SkASSERT(!overflows_short_shift(8191, shift));
+    SkASSERT(overflows_short_shift(8192, shift));
+    SkASSERT(!overflows_short_shift(32767, 0));
+    SkASSERT(overflows_short_shift(32768, 0));
+
+    // Since we expect these to succeed, we bit-or together
+    // for a tiny extra bit of speed.
+    return overflows_short_shift(rect.fLeft, shift) |
+           overflows_short_shift(rect.fRight, shift) |
+           overflows_short_shift(rect.fTop, shift) |
+           overflows_short_shift(rect.fBottom, shift);
+}
+
+static bool safeRoundOut(const SkRect& src, SkIRect* dst, int32_t maxInt) {
+    const SkScalar maxScalar = SkIntToScalar(maxInt);
+
+    if (fitsInsideLimit(src, maxScalar)) {
+        src.roundOut(dst);
+        return true;
+    }
+    return false;
+}
+
 // Check if the path is a rect and fat enough after clipping; if so, blit it.
 static inline bool TryBlitFatAntiRect(SkBlitter* blitter, const SkPath& path, const SkIRect& clip) {
     SkRect rect;
@@ -95,4 +134,93 @@
     return true;
 }
 
+using FillPathFunc = std::function<void(const SkPath& path, SkBlitter* blitter, bool isInverse,
+        const SkIRect& ir, const SkIRect& clipBounds, bool containedInClip, bool forceRLE)>;
+
+static inline void do_fill_path(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
+        bool forceRLE, const int SHIFT, FillPathFunc fillPathFunc) {
+    if (origClip.isEmpty()) {
+        return;
+    }
+
+    const bool isInverse = path.isInverseFillType();
+    SkIRect ir;
+
+    if (!safeRoundOut(path.getBounds(), &ir, SK_MaxS32 >> SHIFT)) {
+        // Bounds can't fit in SkIRect; we'll return without drawing
+        return;
+    }
+    if (ir.isEmpty()) {
+        if (isInverse) {
+            blitter->blitRegion(origClip);
+        }
+        return;
+    }
+
+    // If the intersection of the path bounds and the clip bounds
+    // will overflow 32767 when << by SHIFT, we can't supersample,
+    // so draw without antialiasing.
+    SkIRect clippedIR;
+    if (isInverse) {
+       // If the path is an inverse fill, it's going to fill the entire
+       // clip, and we care whether the entire clip exceeds our limits.
+       clippedIR = origClip.getBounds();
+    } else {
+       if (!clippedIR.intersect(ir, origClip.getBounds())) {
+           return;
+       }
+    }
+    if (rect_overflows_short_shift(clippedIR, SHIFT)) {
+        SkScan::FillPath(path, origClip, blitter);
+        return;
+    }
+
+    // Our antialiasing can't handle a clip larger than 32767, so we restrict
+    // the clip to that limit here. (the runs[] uses int16_t for its index).
+    //
+    // A more general solution (one that could also eliminate the need to
+    // disable aa based on ir bounds (see overflows_short_shift) would be
+    // to tile the clip/target...
+    SkRegion tmpClipStorage;
+    const SkRegion* clipRgn = &origClip;
+    {
+        static const int32_t kMaxClipCoord = 32767;
+        const SkIRect& bounds = origClip.getBounds();
+        if (bounds.fRight > kMaxClipCoord || bounds.fBottom > kMaxClipCoord) {
+            SkIRect limit = { 0, 0, kMaxClipCoord, kMaxClipCoord };
+            tmpClipStorage.op(origClip, limit, SkRegion::kIntersect_Op);
+            clipRgn = &tmpClipStorage;
+        }
+    }
+    // for here down, use clipRgn, not origClip
+
+    SkScanClipper   clipper(blitter, clipRgn, ir);
+    const SkIRect*  clipRect = clipper.getClipRect();
+
+    if (clipper.getBlitter() == nullptr) { // clipped out
+        if (isInverse) {
+            blitter->blitRegion(*clipRgn);
+        }
+        return;
+    }
+
+    SkASSERT(clipper.getClipRect() == nullptr ||
+            *clipper.getClipRect() == clipRgn->getBounds());
+
+    // now use the (possibly wrapped) blitter
+    blitter = clipper.getBlitter();
+
+    if (isInverse) {
+        sk_blit_above(blitter, ir, *clipRgn);
+    }
+
+    SkASSERT(SkIntToScalar(ir.fTop) <= path.getBounds().fTop);
+
+    fillPathFunc(path, blitter, isInverse, ir, clipRgn->getBounds(), clipRect == nullptr, forceRLE);
+
+    if (isInverse) {
+        sk_blit_below(blitter, ir, *clipRgn);
+    }
+}
+
 #endif
diff --git a/src/core/SkScan_AAAPath.cpp b/src/core/SkScan_AAAPath.cpp
index c1f4f41..d62b151 100644
--- a/src/core/SkScan_AAAPath.cpp
+++ b/src/core/SkScan_AAAPath.cpp
@@ -1672,46 +1672,48 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-void SkScan::AAAFillPath(const SkPath& path, SkBlitter* blitter, const SkIRect& ir,
-                         const SkIRect& clipBounds, bool forceRLE) {
-    bool isInverse = path.isInverseFillType();
-    bool containedInClip = clipBounds.contains(ir);
-
-    // The mask blitter (where we store intermediate alpha values directly in a mask, and then call
-    // the real blitter once in the end to blit the whole mask) is faster than the RLE blitter when
-    // the blit region is small enough (i.e., canHandleRect(ir)). When isInverse is true, the blit
-    // region is no longer the rectangle ir so we won't use the mask blitter. The caller may also
-    // use the forceRLE flag to force not using the mask blitter. Also, when the path is a simple
-    // rect, preparing a mask and blitting it might have too much overhead. Hence we'll use
-    // blitFatAntiRect to avoid the mask and its overhead.
-    if (MaskAdditiveBlitter::canHandleRect(ir) && !isInverse && !forceRLE) {
+void SkScan::AAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
+                         bool forceRLE) {
+    FillPathFunc fillPathFunc = [](const SkPath& path, SkBlitter* blitter, bool isInverse,
+            const SkIRect& ir, const SkIRect& clipBounds, bool containedInClip, bool forceRLE){
+        // The mask blitter (where we store intermediate alpha values directly in a mask, and then
+        // call the real blitter once in the end to blit the whole mask) is faster than the RLE
+        // blitter when the blit region is small enough (i.e., canHandleRect(ir)).
+        // When isInverse is true, the blit region is no longer ir so we won't use the mask blitter.
+        // The caller may also use the forceRLE flag to force not using the mask blitter.
+        // Also, when the path is a simple rect, preparing a mask and blitting it might have too
+        // much overhead. Hence we'll use blitFatAntiRect to avoid the mask and its overhead.
+        if (MaskAdditiveBlitter::canHandleRect(ir) && !isInverse && !forceRLE) {
 #ifdef SK_SUPPORT_LEGACY_SMALLRECT_AA
-        MaskAdditiveBlitter additiveBlitter(blitter, ir, clipBounds, isInverse);
-        aaa_fill_path(path, clipBounds, &additiveBlitter, ir.fTop, ir.fBottom,
-                containedInClip, true, forceRLE);
-#else
-        // blitFatAntiRect is slower than the normal AAA flow without MaskAdditiveBlitter.
-        // Hence only tryBlitFatAntiRect when MaskAdditiveBlitter would have been used.
-        if (!TryBlitFatAntiRect(blitter, path, clipBounds)) {
             MaskAdditiveBlitter additiveBlitter(blitter, ir, clipBounds, isInverse);
             aaa_fill_path(path, clipBounds, &additiveBlitter, ir.fTop, ir.fBottom,
                     containedInClip, true, forceRLE);
-        }
+#else
+            // blitFatAntiRect is slower than the normal AAA flow without MaskAdditiveBlitter.
+            // Hence only tryBlitFatAntiRect when MaskAdditiveBlitter would have been used.
+            if (!TryBlitFatAntiRect(blitter, path, clipBounds)) {
+                MaskAdditiveBlitter additiveBlitter(blitter, ir, clipBounds, isInverse);
+                aaa_fill_path(path, clipBounds, &additiveBlitter, ir.fTop, ir.fBottom,
+                        containedInClip, true, forceRLE);
+            }
 #endif
-    } else if (!isInverse && path.isConvex()) {
-        // If the filling area is convex (i.e., path.isConvex && !isInverse), our simpler
-        // aaa_walk_convex_edges won't generate alphas above 255. Hence we don't need
-        // SafeRLEAdditiveBlitter (which is slow due to clamping). The basic RLE blitter
-        // RunBasedAdditiveBlitter would suffice.
-        RunBasedAdditiveBlitter additiveBlitter(blitter, ir, clipBounds, isInverse);
-        aaa_fill_path(path, clipBounds, &additiveBlitter, ir.fTop, ir.fBottom,
-                containedInClip, false, forceRLE);
-    } else {
-        // If the filling area might not be convex, the more involved aaa_walk_edges would
-        // be called and we have to clamp the alpha downto 255. The SafeRLEAdditiveBlitter
-        // does that at a cost of performance.
-        SafeRLEAdditiveBlitter additiveBlitter(blitter, ir, clipBounds, isInverse);
-        aaa_fill_path(path, clipBounds, &additiveBlitter, ir.fTop, ir.fBottom,
-                containedInClip, false, forceRLE);
-    }
+        } else if (!isInverse && path.isConvex()) {
+            // If the filling area is convex (i.e., path.isConvex && !isInverse), our simpler
+            // aaa_walk_convex_edges won't generate alphas above 255. Hence we don't need
+            // SafeRLEAdditiveBlitter (which is slow due to clamping). The basic RLE blitter
+            // RunBasedAdditiveBlitter would suffice.
+            RunBasedAdditiveBlitter additiveBlitter(blitter, ir, clipBounds, isInverse);
+            aaa_fill_path(path, clipBounds, &additiveBlitter, ir.fTop, ir.fBottom,
+                    containedInClip, false, forceRLE);
+        } else {
+            // If the filling area might not be convex, the more involved aaa_walk_edges would
+            // be called and we have to clamp the alpha downto 255. The SafeRLEAdditiveBlitter
+            // does that at a cost of performance.
+            SafeRLEAdditiveBlitter additiveBlitter(blitter, ir, clipBounds, isInverse);
+            aaa_fill_path(path, clipBounds, &additiveBlitter, ir.fTop, ir.fBottom,
+                    containedInClip, false, forceRLE);
+        }
+    };
+
+    do_fill_path(path, origClip, blitter, forceRLE, 2, std::move(fillPathFunc));
 }
diff --git a/src/core/SkScan_AntiPath.cpp b/src/core/SkScan_AntiPath.cpp
index 298fae0..ba42a72 100644
--- a/src/core/SkScan_AntiPath.cpp
+++ b/src/core/SkScan_AntiPath.cpp
@@ -613,153 +613,33 @@
     return path.countPoints() < SkTMax(bounds.width(), bounds.height()) / 2 - 10;
 }
 
-void SkScan::SAAFillPath(const SkPath& path, SkBlitter* blitter, const SkIRect& ir,
-                  const SkIRect& clipBounds, bool forceRLE) {
-    bool isInverse = path.isInverseFillType();
-    bool containedInClip = clipBounds.contains(ir);
-
-    // MaskSuperBlitter can't handle drawing outside of ir, so we can't use it
-    // if we're an inverse filltype
-    if (!isInverse && MaskSuperBlitter::CanHandleRect(ir) && !forceRLE) {
-        MaskSuperBlitter superBlit(blitter, ir, clipBounds, isInverse);
-        SkASSERT(SkIntToScalar(ir.fTop) <= path.getBounds().fTop);
-        sk_fill_path(path, clipBounds, &superBlit, ir.fTop, ir.fBottom, SHIFT, containedInClip);
-    } else {
-        SuperBlitter superBlit(blitter, ir, clipBounds, isInverse);
-        sk_fill_path(path, clipBounds, &superBlit, ir.fTop, ir.fBottom, SHIFT, containedInClip);
-    }
-}
-
-static bool fitsInsideLimit(const SkRect& r, SkScalar max) {
-    const SkScalar min = -max;
-    return  r.fLeft > min && r.fTop > min &&
-            r.fRight < max && r.fBottom < max;
-}
-
-static int overflows_short_shift(int value, int shift) {
-    const int s = 16 + shift;
-    return (SkLeftShift(value, s) >> s) - value;
-}
-
-/**
-  Would any of the coordinates of this rectangle not fit in a short,
-  when left-shifted by shift?
-*/
-static int rect_overflows_short_shift(SkIRect rect, int shift) {
-    SkASSERT(!overflows_short_shift(8191, shift));
-    SkASSERT(overflows_short_shift(8192, shift));
-    SkASSERT(!overflows_short_shift(32767, 0));
-    SkASSERT(overflows_short_shift(32768, 0));
-
-    // Since we expect these to succeed, we bit-or together
-    // for a tiny extra bit of speed.
-    return overflows_short_shift(rect.fLeft, shift) |
-           overflows_short_shift(rect.fRight, shift) |
-           overflows_short_shift(rect.fTop, shift) |
-           overflows_short_shift(rect.fBottom, shift);
-}
-
-static bool safeRoundOut(const SkRect& src, SkIRect* dst, int32_t maxInt) {
-    const SkScalar maxScalar = SkIntToScalar(maxInt);
-
-    if (fitsInsideLimit(src, maxScalar)) {
-        src.roundOut(dst);
-        return true;
-    }
-    return false;
-}
-
 void SkScan::AntiFillPath(const SkPath& path, const SkRegion& origClip,
                           SkBlitter* blitter, bool forceRLE) {
-    if (origClip.isEmpty()) {
-        return;
-    }
-
-    const bool isInverse = path.isInverseFillType();
-    SkIRect ir;
-
-    if (!safeRoundOut(path.getBounds(), &ir, SK_MaxS32 >> SHIFT)) {
-        // Bounds can't fit in SkIRect; we'll return without drawing
-        return;
-    }
-    if (ir.isEmpty()) {
-        if (isInverse) {
-            blitter->blitRegion(origClip);
-        }
-        return;
-    }
-
-    // If the intersection of the path bounds and the clip bounds
-    // will overflow 32767 when << by SHIFT, we can't supersample,
-    // so draw without antialiasing.
-    SkIRect clippedIR;
-    if (isInverse) {
-       // If the path is an inverse fill, it's going to fill the entire
-       // clip, and we care whether the entire clip exceeds our limits.
-       clippedIR = origClip.getBounds();
-    } else {
-       if (!clippedIR.intersect(ir, origClip.getBounds())) {
-           return;
-       }
-    }
-    if (rect_overflows_short_shift(clippedIR, SHIFT)) {
-        SkScan::FillPath(path, origClip, blitter);
-        return;
-    }
-
-    // Our antialiasing can't handle a clip larger than 32767, so we restrict
-    // the clip to that limit here. (the runs[] uses int16_t for its index).
-    //
-    // A more general solution (one that could also eliminate the need to
-    // disable aa based on ir bounds (see overflows_short_shift) would be
-    // to tile the clip/target...
-    SkRegion tmpClipStorage;
-    const SkRegion* clipRgn = &origClip;
-    {
-        static const int32_t kMaxClipCoord = 32767;
-        const SkIRect& bounds = origClip.getBounds();
-        if (bounds.fRight > kMaxClipCoord || bounds.fBottom > kMaxClipCoord) {
-            SkIRect limit = { 0, 0, kMaxClipCoord, kMaxClipCoord };
-            tmpClipStorage.op(origClip, limit, SkRegion::kIntersect_Op);
-            clipRgn = &tmpClipStorage;
-        }
-    }
-    // for here down, use clipRgn, not origClip
-
-    SkScanClipper   clipper(blitter, clipRgn, ir);
-
-    if (clipper.getBlitter() == nullptr) { // clipped out
-        if (isInverse) {
-            blitter->blitRegion(*clipRgn);
-        }
-        return;
-    }
-
-    SkASSERT(clipper.getClipRect() == nullptr ||
-            *clipper.getClipRect() == clipRgn->getBounds());
-
-    // now use the (possibly wrapped) blitter
-    blitter = clipper.getBlitter();
-
-    if (isInverse) {
-        sk_blit_above(blitter, ir, *clipRgn);
-    }
-
-    SkASSERT(SkIntToScalar(ir.fTop) <= path.getBounds().fTop);
-
     if (ShouldUseDAA(path)) {
-        SkScan::DAAFillPath(path, blitter, ir, clipRgn->getBounds(), forceRLE);
+        SkScan::DAAFillPath(path, origClip, blitter, forceRLE);
+        return;
     } else if (ShouldUseAAA(path)) {
         // Do not use AAA if path is too complicated:
         // there won't be any speedup or significant visual improvement.
-        SkScan::AAAFillPath(path, blitter, ir, clipRgn->getBounds(), forceRLE);
-    } else {
-        SkScan::SAAFillPath(path, blitter, ir, clipRgn->getBounds(), forceRLE);
+        SkScan::AAAFillPath(path, origClip, blitter, forceRLE);
+        return;
     }
 
-    if (isInverse) {
-        sk_blit_below(blitter, ir, *clipRgn);
-    }
+    FillPathFunc fillPathFunc = [](const SkPath& path, SkBlitter* blitter, bool isInverse,
+            const SkIRect& ir, const SkIRect& clipBounds, bool containedInClip, bool forceRLE){
+        // MaskSuperBlitter can't handle drawing outside of ir, so we can't use it
+        // if we're an inverse filltype
+        if (!isInverse && MaskSuperBlitter::CanHandleRect(ir) && !forceRLE) {
+            MaskSuperBlitter    superBlit(blitter, ir, clipBounds, isInverse);
+            SkASSERT(SkIntToScalar(ir.fTop) <= path.getBounds().fTop);
+            sk_fill_path(path, clipBounds, &superBlit, ir.fTop, ir.fBottom, SHIFT, containedInClip);
+        } else {
+            SuperBlitter    superBlit(blitter, ir, clipBounds, isInverse);
+            sk_fill_path(path, clipBounds, &superBlit, ir.fTop, ir.fBottom, SHIFT, containedInClip);
+        }
+    };
+
+    do_fill_path(path, origClip, blitter, forceRLE, SHIFT, std::move(fillPathFunc));
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/SkScan_DAAPath.cpp b/src/core/SkScan_DAAPath.cpp
index f3c7cbc..e213e34 100644
--- a/src/core/SkScan_DAAPath.cpp
+++ b/src/core/SkScan_DAAPath.cpp
@@ -315,43 +315,48 @@
     }
 }
 
-// For threaded backend with out-of-order init-once, we probably have to take care of the
-// blitRegion, sk_blit_above, sk_blit_below in SkScan::AntiFillPath to maintain the draw order. If
-// we do that, be caureful that blitRect may throw exception if the rect is empty.
-void SkScan::DAAFillPath(const SkPath& path, SkBlitter* blitter, const SkIRect& ir,
-                         const SkIRect& clipBounds, bool forceRLE) {
-    bool isEvenOdd  = path.getFillType() & 1;
-    bool isConvex   = path.isConvex();
-    bool isInverse  = path.isInverseFillType();
-    bool skipRect   = isConvex && !isInverse;
-    bool containedInClip = clipBounds.contains(ir);
+void SkScan::DAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
+                         bool forceRLE) {
 
-    SkIRect clippedIR = ir;
-    clippedIR.intersect(clipBounds);
+    FillPathFunc fillPathFunc = [](const SkPath& path, SkBlitter* blitter, bool isInverse,
+            const SkIRect& ir, const SkIRect& clipBounds, bool containedInClip, bool forceRLE){
+        bool isEvenOdd  = path.getFillType() & 1;
+        bool isConvex   = path.isConvex();
+        bool skipRect   = isConvex && !isInverse;
 
-    // The overhead of even constructing SkCoverageDeltaList/Mask is too big.
-    // So TryBlitFatAntiRect and return if it's successful.
-    if (!isInverse && TryBlitFatAntiRect(blitter, path, clipBounds)) {
-        return;
-    }
+        SkIRect clippedIR = ir;
+        clippedIR.intersect(clipBounds);
+
+        // The overhead of even constructing SkCoverageDeltaList/Mask is too big.
+        // So TryBlitFatAntiRect and return if it's successful.
+        if (!isInverse && TryBlitFatAntiRect(blitter, path, clipBounds)) {
+            return;
+        }
 
 #ifdef GOOGLE3
-    constexpr int STACK_SIZE = 12 << 10; // 12K stack size alloc; Google3 has 16K limit.
+        constexpr int STACK_SIZE = 12 << 10; // 12K stack size alloc; Google3 has 16K limit.
 #else
-    constexpr int STACK_SIZE = 64 << 10; // 64k stack size to avoid heap allocation
+        constexpr int STACK_SIZE = 64 << 10; // 64k stack size to avoid heap allocation
 #endif
-    SkSTArenaAlloc<STACK_SIZE> alloc; // avoid heap allocation with SkSTArenaAlloc
+        SkSTArenaAlloc<STACK_SIZE> alloc; // avoid heap allocation with SkSTArenaAlloc
 
-    // Only blitter->blitXXX needs to be done in order in the threaded backend.
-    // Everything before can be done out of order in the threaded backend.
-    if (!forceRLE && !isInverse && SkCoverageDeltaMask::Suitable(clippedIR)) {
-        SkCoverageDeltaMask deltaMask(&alloc, clippedIR);
-        gen_alpha_deltas(path, clipBounds, deltaMask, blitter, skipRect, containedInClip);
-        deltaMask.convertCoverageToAlpha(isEvenOdd, isInverse, isConvex);
-        blitter->blitMask(deltaMask.prepareSkMask(), clippedIR);
-    } else {
-        SkCoverageDeltaList deltaList(&alloc, clippedIR.fTop, clippedIR.fBottom, forceRLE);
-        gen_alpha_deltas(path, clipBounds, deltaList, blitter, skipRect, containedInClip);
-        blitter->blitCoverageDeltas(&deltaList, clipBounds, isEvenOdd, isInverse, isConvex);
-    }
+        // Only blitter->blitXXX need to be done in order in the threaded backend.
+        // Everything before can be done out of order in the threaded backend.
+        if (!forceRLE && !isInverse && SkCoverageDeltaMask::Suitable(clippedIR)) {
+            SkCoverageDeltaMask deltaMask(&alloc, clippedIR);
+            gen_alpha_deltas(path, clipBounds, deltaMask, blitter, skipRect, containedInClip);
+            deltaMask.convertCoverageToAlpha(isEvenOdd, isInverse, isConvex);
+            blitter->blitMask(deltaMask.prepareSkMask(), clippedIR);
+        } else {
+            SkCoverageDeltaList deltaList(&alloc, clippedIR.fTop, clippedIR.fBottom, forceRLE);
+            gen_alpha_deltas(path, clipBounds, deltaList, blitter, skipRect, containedInClip);
+            blitter->blitCoverageDeltas(&deltaList, clipBounds, isEvenOdd, isInverse, isConvex);
+        }
+    };
+
+    // For threaded backend with out-of-order init-once (and therefore out-of-order do_fill_path),
+    // we probably have to take care of the blitRegion, sk_blit_above, sk_blit_below in do_fill_path
+    // to maintain the draw order. If we do that, be caureful that blitRect may throw exception is
+    // the rect is empty.
+    do_fill_path(path, origClip, blitter, forceRLE, 2, std::move(fillPathFunc));
 }