SkBlitter is not thread safe; make one for each thread.

Otherwise, GM fancy_gradients would be drawn incorrectly and TSAN
will issue alerts as SkARGB32_Shader_Blitter has its own memory
that may be written during blitting.

As we make one blitter for each thread, we also don't need to
send in a thread-alloc for blitCoverageDeltas

Bug: skia:
Change-Id: Ie4ee0886b88c797ab57c65674b0b7527501b164f
Reviewed-on: https://skia-review.googlesource.com/120641
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Yuqian Li <liyuqian@google.com>
diff --git a/src/core/SkAutoBlitterChoose.h b/src/core/SkAutoBlitterChoose.h
index 3698598..3c2e8af 100644
--- a/src/core/SkAutoBlitterChoose.h
+++ b/src/core/SkAutoBlitterChoose.h
@@ -28,10 +28,11 @@
     SkBlitter*  operator->() { return fBlitter; }
     SkBlitter*  get() const { return fBlitter; }
 
-    void choose(const SkPixmap& dst, const SkMatrix& matrix,
+    SkBlitter* choose(const SkPixmap& dst, const SkMatrix& matrix,
                 const SkPaint& paint, bool drawCoverage = false) {
         SkASSERT(!fBlitter);
         fBlitter = SkBlitter::Choose(dst, matrix, paint, &fAlloc, drawCoverage);
+        return fBlitter;
     }
 
 private:
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index 18b9d83..df67ffb 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -81,13 +81,11 @@
 }
 
 void SkBlitter::blitCoverageDeltas(SkCoverageDeltaList* deltas, const SkIRect& clip,
-                                   bool isEvenOdd, bool isInverse, bool isConvex,
-                                   SkArenaAlloc* alloc) {
-    // We cannot use blitter to allocate the storage because the same blitter might be used across
-    // many threads.
-    int      runSize    = clip.width() + 1; // +1 so we can set runs[clip.width()] = 0
-    int16_t* runs       = alloc->makeArrayDefault<int16_t>(runSize);
-    SkAlpha* alphas     = alloc->makeArrayDefault<SkAlpha>(runSize);
+                                   bool isEvenOdd, bool isInverse, bool isConvex) {
+    int         runSize = clip.width() + 1; // +1 so we can set runs[clip.width()] = 0
+    void*       storage = this->allocBlitMemory(runSize * (sizeof(int16_t) + sizeof(SkAlpha)));
+    int16_t*    runs    = reinterpret_cast<int16_t*>(storage);
+    SkAlpha*    alphas  = reinterpret_cast<SkAlpha*>(runs + runSize);
     runs[clip.width()]  = 0; // we must set the last run to 0 so blitAntiH can stop there
 
     bool canUseMask = !deltas->forceRLE() &&
diff --git a/src/core/SkBlitter.h b/src/core/SkBlitter.h
index 357eb46..c280ac3 100644
--- a/src/core/SkBlitter.h
+++ b/src/core/SkBlitter.h
@@ -36,8 +36,7 @@
     // For example, one may avoid some virtual blitAntiH calls by directly calling
     // SkBlitRow::Color32.
     virtual void blitCoverageDeltas(SkCoverageDeltaList* deltas, const SkIRect& clip,
-                                    bool isEvenOdd, bool isInverse, bool isConvex,
-                                    SkArenaAlloc* alloc);
+                                    bool isEvenOdd, bool isInverse, bool isConvex);
 
     /// Blit a horizontal run of one or more pixels.
     virtual void blitH(int x, int y, int width) = 0;
diff --git a/src/core/SkDraw.cpp b/src/core/SkDraw.cpp
index 5c5247c..f205080 100644
--- a/src/core/SkDraw.cpp
+++ b/src/core/SkDraw.cpp
@@ -953,15 +953,8 @@
                          SkBlitter* customBlitter, bool doFill, SkInitOnceData* iData) const {
     SkBlitter* blitter = nullptr;
     SkAutoBlitterChoose blitterStorage;
-    SkAutoBlitterChoose* blitterStoragePtr = &blitterStorage;
-    if (iData) {
-        // we're in the threaded init-once phase; the blitter has to be allocated in the thread
-        // allocator so it will remain valid later during the draw phase.
-        blitterStoragePtr = iData->fAlloc->make<SkAutoBlitterChoose>();
-    }
     if (nullptr == customBlitter) {
-        blitterStoragePtr->choose(fDst, *fMatrix, paint, drawCoverage);
-        blitter = blitterStoragePtr->get();
+        blitter = blitterStorage.choose(fDst, *fMatrix, paint, drawCoverage);
     } else {
         blitter = customBlitter;
     }
@@ -1025,20 +1018,27 @@
         // remaining work to draw phase. This is a simple example of how to add init-once to
         // existing drawXXX commands: simply send in SkInitOnceData, do as much init work as
         // possible, and finally wrap the remaining work into iData->fElement->fDrawFn.
-        iData->fElement->setDrawFn([proc, devPath, blitter](SkArenaAlloc* alloc,
+        SkASSERT(customBlitter == nullptr);
+        iData->fElement->setDrawFn([proc, devPath, paint, drawCoverage](SkArenaAlloc* alloc,
                 const SkThreadedBMPDevice::DrawState& ds, const SkIRect& tileBounds) {
             SkThreadedBMPDevice::TileDraw tileDraw(ds, tileBounds);
-            proc(devPath, *tileDraw.fRC, blitter);
+            SkAutoBlitterChoose blitterStorage;
+            proc(devPath, *tileDraw.fRC, blitterStorage.choose(tileDraw.fDst, *tileDraw.fMatrix,
+                                                               paint, drawCoverage));
         });
     } else {
         // We can use DAA to do scan conversion in the init-once phase.
         SkDAARecord* record = iData->fAlloc->make<SkDAARecord>(iData->fAlloc);
         SkNullBlitter nullBlitter; // We don't want to blit anything during the init phase
         SkScan::AntiFillPath(devPath, *fRC, &nullBlitter, record);
-        iData->fElement->setDrawFn([record, devPath, blitter](SkArenaAlloc* alloc,
+        SkASSERT(customBlitter == nullptr);
+        iData->fElement->setDrawFn([record, devPath, paint, drawCoverage](SkArenaAlloc* alloc,
                     const SkThreadedBMPDevice::DrawState& ds, const SkIRect& tileBounds) {
             SkASSERT(record->fType != SkDAARecord::Type::kToBeComputed);
             SkThreadedBMPDevice::TileDraw tileDraw(ds, tileBounds);
+            SkAutoBlitterChoose blitterStorage;
+            SkBlitter* blitter = blitterStorage.choose(tileDraw.fDst, *tileDraw.fMatrix,
+                                                       paint, drawCoverage);
             SkScan::AntiFillPath(devPath, *tileDraw.fRC, blitter, record);
         });
     }
diff --git a/src/core/SkScan_DAAPath.cpp b/src/core/SkScan_DAAPath.cpp
index cce759d..a0d656f 100644
--- a/src/core/SkScan_DAAPath.cpp
+++ b/src/core/SkScan_DAAPath.cpp
@@ -379,8 +379,7 @@
         if (record->fType == SkDAARecord::Type::kMask) {
             blitter->blitMask(record->fMask, clippedIR);
         } else {
-            blitter->blitCoverageDeltas(record->fList,
-                                        clipBounds, isEvenOdd, isInverse, isConvex, alloc);
+            blitter->blitCoverageDeltas(record->fList, clipBounds, isEvenOdd, isInverse, isConvex);
         }
     }
 }
diff --git a/src/core/SkThreadedBMPDevice.cpp b/src/core/SkThreadedBMPDevice.cpp
index 0c30237..1c15bff 100644
--- a/src/core/SkThreadedBMPDevice.cpp
+++ b/src/core/SkThreadedBMPDevice.cpp
@@ -149,7 +149,8 @@
         const SkMatrix* prePathMatrix, bool pathIsMutable) {
     SkRect drawBounds = path.isInverseFillType() ? SkRectPriv::MakeLargest()
                                                  : get_fast_bounds(path.getBounds(), paint);
-    if (path.countVerbs() < 4) { // when path is small, init-once has too much overhead
+    // when path is small, init-once has too much overhead; init-once also can't handle mask filter
+    if (path.countVerbs() < 4 || paint.getMaskFilter()) {
         fQueue.push(drawBounds, [=](SkArenaAlloc*, const DrawState& ds, const SkIRect& tileBounds) {
             TileDraw(ds, tileBounds).drawPath(path, paint, prePathMatrix, false);
         });