Revert "Experimental blur code."

This reverts commit d96ed9d0def2d660f537e4ab5c79e9e66470ee22.

Reason for revert: dm crashing

https://luci-milo.appspot.com/swarming/task/374d82c1d1263910/steps/symbolized_dm/0/stdout

Likely culprit:
	unit test  BlurDrawing

Stack trace:
    /mnt/pd0/s/w/ir/out/Debug/dm(+0x2440eb) [0x568770eb]
    linux-gate.so.1(__kernel_sigreturn+0) [0xf7714ca0]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNK16SkMaskBlurFilter11blurOneScanENS_10FilterInfoEPKhjS2_PhjS3_+0x236) [0x5766301e]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNK16SkMaskBlurFilter4blurERK6SkMaskPS0_+0x285) [0x57663491]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZN10SkBlurMask7BoxBlurEP6SkMaskRKS0_f11SkBlurStyle13SkBlurQualityP8SkIPointb+0x5c) [0x5720d48e]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNK20SkBlurMaskFilterImpl10filterMaskEP6SkMaskRKS0_RK8SkMatrixP8SkIPoint+0x67) [0x5720e427]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNK20SkBlurMaskFilterImpl17filterRectsToNineEPK6SkRectiRK8SkMatrixRK7SkIRectPN12SkMaskFilter9NinePatchE+0x579) [0x5721a247]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNK12SkMaskFilter10filterPathERK6SkPathRK8SkMatrixRK12SkRasterClipP9SkBlitterN11SkStrokeRec9InitStyleE+0xf6) [0x5706c2bc]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNK6SkDraw11drawDevPathERK6SkPathRK7SkPaintbP9SkBlitterb+0x1bb) [0x57034f1b]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNK6SkDraw8drawPathERK6SkPathRK7SkPaintPK8SkMatrixbbP9SkBlitter+0x47f) [0x57035dd3]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZN14SkBitmapDevice8drawPathERK6SkPathRK7SkPaintPK8SkMatrixb+0x6e) [0x56f937fc]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZN8SkCanvas10onDrawPathERK6SkPathRK7SkPaint+0x39d) [0x56fb12e5]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZN8SkCanvas8drawPathERK6SkPathRK7SkPaint+0x18) [0x56fad926]
    /mnt/pd0/s/w/ir/out/Debug/dm(+0x3f71b3) [0x56a2a1b3]
    /mnt/pd0/s/w/ir/out/Debug/dm(+0x2450bd) [0x568780bd]
    /mnt/pd0/s/w/ir/out/Debug/dm(+0x2450fb) [0x568780fb]
    /mnt/pd0/s/w/ir/out/Debug/dm(+0xae53e7) [0x571183e7]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZNKSt8functionIFvvEEclEv+0x20) [0x569f98de]
    /mnt/pd0/s/w/ir/out/Debug/dm(_ZN12SkThreadPool4LoopEPv+0x298) [0x57045b2f]
    /mnt/pd0/s/w/ir/out/Debug/dm(+0xbbd92c) [0x571f092c]
    /lib/i386-linux-gnu/libpthread.so.0(+0x627a) [0xf76e827a]
    /lib/i386-linux-gnu/libc.so.6(clone+0x66) [0xf70ecb56]
Segmentation fault

Original change's description:
> Experimental blur code.
> 
> This uses a new method of blurring that runs the three 
> passes of the box filter in a single pass. This implementation
> currently only does 1x1 pixel at a time, but it should be simple
> to expand to 4x4 pixels at a time.
> 
> On the  blur_10_normal_high_quality benchmark, the new is 7% faster
> than the old code. For the blur_100.50_normal_high_quality
> benchmark, the new code is 11% slower.
> 
> Change-Id: Iea37294abc7c27de5ad569adf8bc62df77eafd02
> Reviewed-on: https://skia-review.googlesource.com/21739
> Commit-Queue: Herb Derby <herb@google.com>
> Reviewed-by: Mike Reed <reed@google.com>

TBR=herb@google.com,reed@google.com

Change-Id: I9e896c548d0a4cd3308d6a311c8bd16719a08a85
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/22421
Reviewed-by: Florin Malita <fmalita@google.com>
Commit-Queue: Florin Malita <fmalita@google.com>
diff --git a/gn/core.gni b/gn/core.gni
index f2fd00d..c0eecd9 100644
--- a/gn/core.gni
+++ b/gn/core.gni
@@ -178,8 +178,6 @@
   "$_src/core/SkMD5.h",
   "$_src/core/SkMallocPixelRef.cpp",
   "$_src/core/SkMask.cpp",
-  "$_src/core/SkMaskBlurFilter.h",
-  "$_src/core/SkMaskBlurFilter.cpp",
   "$_src/core/SkMaskCache.cpp",
   "$_src/core/SkMaskFilter.cpp",
   "$_src/core/SkMaskGamma.cpp",
diff --git a/src/core/SkMakeUnique.h b/src/core/SkMakeUnique.h
index 860ea2e..188eb05 100644
--- a/src/core/SkMakeUnique.h
+++ b/src/core/SkMakeUnique.h
@@ -18,11 +18,6 @@
     return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
 }
 
-template<typename T>
-std::unique_ptr<T> make_unique_default(size_t n) {
-    return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]);
-}
-
 }
 
 #endif  // SkMakeUnique_DEFINED
diff --git a/src/core/SkMaskBlurFilter.cpp b/src/core/SkMaskBlurFilter.cpp
deleted file mode 100644
index 8b4c2f4..0000000
--- a/src/core/SkMaskBlurFilter.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright 2017 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkMaskBlurFilter.h"
-
-#include <cmath>
-
-#include "SkMakeUnique.h"
-
-static const double kPi = 3.14159265358979323846264338327950288;
-
-static uint64_t weight_from_diameter(uint32_t d) {
-    uint64_t d2 = d * d;
-    uint64_t d3 = d2 * d;
-    if ((d&1) == 0) {
-        // d * d * (d + 1);
-        return d3 + d2;
-    }
-
-    return d3;
-}
-
-static uint32_t filter_window(double sigma) {
-    auto possibleWindow = static_cast<uint32_t>(floor(sigma * 3 * sqrt(2*kPi)/4 + 0.5));
-    return std::max(1u, possibleWindow);
-}
-
-SkMaskBlurFilter::FilterInfo::FilterInfo(double sigma)
-    : fFilterWindow{filter_window(sigma)}
-    , fScaledWeight{(static_cast<uint64_t>(1) << 32) / weight_from_diameter(fFilterWindow)} {}
-
-uint64_t SkMaskBlurFilter::FilterInfo::weight() const {
-    return weight_from_diameter(fFilterWindow);
-
-}
-uint32_t SkMaskBlurFilter::FilterInfo::borderSize() const {
-    if ((fFilterWindow&1) == 0) {
-        return 3 * (fFilterWindow / 2) - 1;
-    }
-    return 3 * (fFilterWindow / 2);
-}
-
-size_t SkMaskBlurFilter::FilterInfo::diameter(uint8_t pass) const {
-    SkASSERT(pass <= 2);
-
-    if ((fFilterWindow&1) == 0) {
-        // Handle even case.
-        switch (pass) {
-            case 0: return fFilterWindow;
-            case 1: return fFilterWindow;
-            case 2: return fFilterWindow+1;
-        }
-    }
-
-    return fFilterWindow;
-}
-
-uint64_t SkMaskBlurFilter::FilterInfo::scaledWeight() const {
-    return fScaledWeight;
-}
-
-SkMaskBlurFilter::SkMaskBlurFilter(double sigmaW, double sigmaH)
-    : fInfoW{sigmaW}, fInfoH{sigmaH}
-    , fBuffer0{skstd::make_unique_default<uint32_t[]>(bufferSize(0))}
-    , fBuffer1{skstd::make_unique_default<uint32_t[]>(bufferSize(1))}
-    , fBuffer2{skstd::make_unique_default<uint32_t[]>(bufferSize(2))} {
-}
-
-SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const {
-
-    uint64_t weightW = fInfoW.weight();
-    uint64_t weightH = fInfoH.weight();
-
-    size_t borderW = fInfoW.borderSize();
-    size_t borderH = fInfoH.borderSize();
-
-    size_t srcW = src.fBounds.width();
-    size_t srcH = src.fBounds.height();
-
-    size_t dstW = srcW + 2 * borderW;
-    size_t dstH = srcH + 2 * borderH;
-
-    dst->fBounds.set(0, 0, dstW, dstH);
-    dst->fBounds.offset(src.fBounds.x(), src.fBounds.y());
-    dst->fBounds.offset(-SkTo<int32_t>(borderW), -SkTo<int32_t>(borderH));
-
-    dst->fImage = nullptr;
-    dst->fRowBytes = dstW;
-    dst->fFormat = SkMask::kA8_Format;
-
-    if (src.fImage == nullptr) {
-        return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
-    }
-
-    dst->fImage = SkMask::AllocImage(dstW * dstH);
-
-    if (weightW > 1 && weightH > 1) {
-        // Blur both directions.
-        size_t tmpW = srcH;
-        size_t tmpH = dstW;
-        auto tmp = skstd::make_unique_default<uint8_t[]>(tmpW * tmpH);
-
-        // Blur horizontally, and transpose.
-        for (size_t y = 0; y < srcH; y++) {
-            auto srcStart = &src.fImage[y * src.fRowBytes];
-            auto tmpStart = &tmp[y];
-            this->blurOneScan(fInfoW,
-                              srcStart, 1, srcStart + srcW,
-                              tmpStart, tmpW, tmpStart + tmpW * tmpH);
-        }
-
-        // Blur vertically (scan in memory order because of the transposition),
-        // and transpose back to the original orientation.
-        for (size_t y = 0; y < tmpH; y++) {
-            auto tmpStart = &tmp[y * tmpW];
-            auto dstStart = &dst->fImage[y];
-            this->blurOneScan(fInfoH,
-                              tmpStart, 1, tmpStart + tmpW,
-                              dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH);
-        }
-    } else if (weightW > 1) {
-        // Blur only horizontally.
-
-        for (size_t y = 0; y < srcH; y++) {
-            auto srcStart = &src.fImage[y * src.fRowBytes];
-            auto dstStart = &dst->fImage[y * dst->fRowBytes];
-            this->blurOneScan(fInfoW,
-                              srcStart, 1, srcStart + srcW,
-                              dstStart, 1, dstStart + dstW);
-        }
-    } else if (weightH > 1) {
-        // Blur only vertically.
-
-        for (size_t x = 0; x < srcW; x++) {
-            auto srcStart = &src.fImage[x];
-            auto srcEnd   = &src.fImage[src.fRowBytes * srcH];
-            auto dstStart = &dst->fImage[x];
-            auto dstEnd   = &dst->fImage[dst->fRowBytes * dstH];
-            this->blurOneScan(fInfoH,
-                              srcStart, src.fRowBytes, srcEnd,
-                              dstStart, dst->fRowBytes, dstEnd);
-        }
-    } else {
-        // Copy to dst. No Blur.
-
-        for (size_t y = 0; y < srcH; y++) {
-            std::memcpy(&dst->fImage[y * dst->fRowBytes], &src.fImage[y * src.fRowBytes], dstW);
-        }
-    }
-
-    return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)};
-}
-
-size_t SkMaskBlurFilter::bufferSize(uint8_t bufferPass) const {
-    return std::max(fInfoW.diameter(bufferPass), fInfoH.diameter(bufferPass)) - 1;
-}
-
-// Blur one horizontal scan into the dst.
-void SkMaskBlurFilter::blurOneScan(
-    FilterInfo info,
-    const uint8_t* src, size_t srcStride, const uint8_t* srcEnd,
-          uint8_t* dst, size_t dstStride,       uint8_t* dstEnd) const {
-
-    auto buffer0Begin = &fBuffer0[0];
-    auto buffer1Begin = &fBuffer1[0];
-    auto buffer2Begin = &fBuffer2[0];
-
-    auto buffer0Cursor = buffer0Begin;
-    auto buffer1Cursor = buffer1Begin;
-    auto buffer2Cursor = buffer2Begin;
-
-    auto buffer0End = &fBuffer0[0] + info.diameter(0) - 1;
-    auto buffer1End = &fBuffer1[0] + info.diameter(1) - 1;
-    auto buffer2End = &fBuffer2[0] + info.diameter(2) - 1;
-
-    std::memset(&fBuffer0[0], 0, (buffer0End - buffer0Begin) * sizeof(fBuffer0[0]));
-    std::memset(&fBuffer1[0], 0, (buffer1End - buffer1Begin) * sizeof(fBuffer1[0]));
-    std::memset(&fBuffer2[0], 0, (buffer2End - buffer2Begin) * sizeof(fBuffer2[0]));
-
-    uint32_t sum0 = 0;
-    uint32_t sum1 = 0;
-    uint32_t sum2 = 0;
-
-    const uint64_t half = static_cast<uint64_t>(1) << 31;
-
-    // Consume the source generating pixels.
-    for (auto srcCursor = src; srcCursor < srcEnd; dst += dstStride, srcCursor += srcStride) {
-        uint32_t s = *srcCursor;
-        sum0 += s;
-        sum1 += sum0;
-        sum2 += sum1;
-
-        *dst = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32);
-
-        sum2 -= *buffer2Cursor;
-        *buffer2Cursor = sum1;
-        buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0];
-
-        sum1 -= *buffer1Cursor;
-        *buffer1Cursor = sum0;
-        buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0];
-
-        sum0 -= *buffer0Cursor;
-        *buffer0Cursor = s;
-        buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0];
-    }
-
-    // This handles the case when both ends of the box are not between [src, srcEnd), and both
-    // are zero at that point.
-    for (int64_t i = 0; i < 2 * info.borderSize() - (srcEnd - src); i++) {
-        uint32_t s = 0;
-        sum0 += s;
-        sum1 += sum0;
-        sum2 += sum1;
-
-        *dst = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32);
-
-        sum2 -= *buffer2Cursor;
-        *buffer2Cursor = sum1;
-        buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0];
-
-        sum1 -= *buffer1Cursor;
-        *buffer1Cursor = sum0;
-        buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0];
-
-        sum0 -= *buffer0Cursor;
-        *buffer0Cursor = s;
-        buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0];
-        dst += dstStride;
-    }
-
-    // Starting from the right, fill in the rest of the buffer.
-    std::memset(&fBuffer0[0], 0, (buffer0End - &fBuffer0[0]) * sizeof(fBuffer0[0]));
-    std::memset(&fBuffer1[0], 0, (buffer1End - &fBuffer1[0]) * sizeof(fBuffer1[0]));
-    std::memset(&fBuffer2[0], 0, (buffer2End - &fBuffer2[0]) * sizeof(fBuffer2[0]));
-
-    sum0 = sum1 = sum2 = 0;
-
-    uint8_t* dstCursor = dstEnd;
-    const uint8_t* srcCursor = srcEnd;
-    do {
-        dstCursor -= dstStride;
-        srcCursor -= srcStride;
-        uint32_t s = *srcCursor;
-        sum0 += s;
-        sum1 += sum0;
-        sum2 += sum1;
-
-        *dstCursor = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32);
-
-        sum2 -= *buffer2Cursor;
-        *buffer2Cursor = sum1;
-        buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0];
-
-        sum1 -= *buffer1Cursor;
-        *buffer1Cursor = sum0;
-        buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0];
-
-        sum0 -= *buffer0Cursor;
-        *buffer0Cursor = s;
-        buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0];
-    } while (dstCursor > dst);
-}
diff --git a/src/core/SkMaskBlurFilter.h b/src/core/SkMaskBlurFilter.h
deleted file mode 100644
index 9becadc..0000000
--- a/src/core/SkMaskBlurFilter.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2017 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkBlurMaskFilter_DEFINED
-#define SkBlurMaskFilter_DEFINED
-
-#include <algorithm>
-#include <memory>
-
-#include "SkMask.h"
-#include "SkTypes.h"
-
-// Implement a single channel Gaussian blur. The specifics for implementation are taken from:
-// https://drafts.fxtf.org/filters/#feGaussianBlurElement
-class SkMaskBlurFilter {
-public:
-    // Given a filter specified by sigma, generate various quantities.
-    class FilterInfo {
-    public:
-        explicit FilterInfo(double sigma);
-
-        // The final weight to divide by given a box size calculated from sigma accumulated for
-        // all three passes. For example, if the box size is 5, then the final weight for all
-        // three passes is 5^3 or 125.
-        uint64_t weight() const;
-
-        // The distance between the first value of the dst and the first value of the src.
-        uint32_t borderSize() const;
-
-        // The size of the box filter.
-        size_t   diameter(uint8_t) const;
-
-        // A factor used to simulate division using multiplication and shift.
-        uint64_t scaledWeight() const;
-
-    private:
-        const uint32_t fFilterWindow;
-        const uint64_t fScaledWeight;
-    };
-
-    // Create an object suitable for filtering an SkMask using a filter with width sigmaW and
-    // height sigmaH.
-    SkMaskBlurFilter(double sigmaW, double sigmaH);
-
-    // Given a src SkMask, generate dst SkMask returning the border width and height.
-    SkIPoint blur(const SkMask& src, SkMask* dst) const;
-
-private:
-    size_t bufferSize(uint8_t bufferPass) const;
-
-    void blurOneScan(FilterInfo gen,
-                     const uint8_t* src, size_t srcStride, const uint8_t* srcEnd,
-                           uint8_t* dst, size_t dstStride,       uint8_t* dstEnd) const;
-
-
-    const FilterInfo            fInfoW,
-                                fInfoH;
-    std::unique_ptr<uint32_t[]> fBuffer0,
-                                fBuffer1,
-                                fBuffer2;
-};
-
-#endif  // SkBlurMaskFilter_DEFINED
diff --git a/src/effects/SkBlurMask.cpp b/src/effects/SkBlurMask.cpp
index fe59ab8..eee1631 100644
--- a/src/effects/SkBlurMask.cpp
+++ b/src/effects/SkBlurMask.cpp
@@ -7,7 +7,6 @@
 
 
 #include "SkBlurMask.h"
-#include "SkMaskBlurFilter.h"
 #include "SkMath.h"
 #include "SkTemplates.h"
 #include "SkEndian.h"
@@ -407,7 +406,13 @@
     return new_width;
 }
 
-
+static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
+{
+    *loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
+    if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
+        *loRadius = *hiRadius - 1;
+    }
+}
 
 #include "SkColorPriv.h"
 
@@ -482,17 +487,6 @@
         return false;
     }
 
-    SkIPoint border;
-
-#ifdef SK_SUPPORT_LEGACY_MASK_BLUR
-
-    auto get_adjusted_radii = [](SkScalar passRadius, int *loRadius, int *hiRadius) {
-        *loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
-        if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
-            *loRadius = *hiRadius - 1;
-        }
-    };
-
     // Force high quality off for small radii (performance)
     if (!force_quality && sigma <= SkIntToScalar(2)) {
         quality = kLow_SkBlurQuality;
@@ -502,12 +496,12 @@
     if (kHigh_SkBlurQuality == quality) {
         // For the high quality path the 3 pass box blur kernel width is
         // 6*rad+1 while the full Gaussian width is 6*sigma.
-        passRadius = sigma - (1 / 6.0f);
+        passRadius = sigma - (1/6.0f);
     } else {
         // For the low quality path we only attempt to cover 3*sigma of the
         // Gaussian blur area (1.5*sigma on each side). The single pass box
         // blur's kernel size is 2*rad+1.
-        passRadius = 1.5f * sigma - 0.5f;
+        passRadius = 1.5f*sigma - 0.5f;
     }
 
     // highQuality: use three box blur passes as a cheap way
@@ -528,8 +522,9 @@
     int padx = passCount * rx;
     int pady = passCount * ry;
 
-    border = {padx, pady};
-
+    if (margin) {
+        margin->set(padx, pady);
+    }
     dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
                      src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
 
@@ -543,15 +538,15 @@
             return false;   // too big to allocate, abort
         }
 
-        int sw = src.fBounds.width();
-        int sh = src.fBounds.height();
-        const uint8_t* sp = src.fImage;
-        uint8_t* dp = SkMask::AllocImage(dstSize);
+        int             sw = src.fBounds.width();
+        int             sh = src.fBounds.height();
+        const uint8_t*  sp = src.fImage;
+        uint8_t*        dp = SkMask::AllocImage(dstSize);
         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
 
         // build the blurry destination
-        SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
-        uint8_t* tp = tmpBuffer.get();
+        SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
+        uint8_t*                tp = tmpBuffer.get();
         int w = sw, h = sh;
 
         if (outerWeight == 255) {
@@ -560,40 +555,33 @@
             if (kHigh_SkBlurQuality == quality) {
                 // Do three X blurs, with a transpose on the final one.
                 w = boxBlur<false>(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h);
-                w = boxBlur<false>(tp, w, dp, hiRadius, loRadius, w, h);
-                w = boxBlur<true>(dp, w, tp, hiRadius, hiRadius, w, h);
+                w = boxBlur<false>(tp, w,             dp, hiRadius, loRadius, w, h);
+                w = boxBlur<true>(dp, w,             tp, hiRadius, hiRadius, w, h);
                 // Do three Y blurs, with a transpose on the final one.
-                h = boxBlur<false>(tp, h, dp, loRadius, hiRadius, h, w);
-                h = boxBlur<false>(dp, h, tp, hiRadius, loRadius, h, w);
-                h = boxBlur<true>(tp, h, dp, hiRadius, hiRadius, h, w);
+                h = boxBlur<false>(tp, h,             dp, loRadius, hiRadius, h, w);
+                h = boxBlur<false>(dp, h,             tp, hiRadius, loRadius, h, w);
+                h = boxBlur<true>(tp, h,             dp, hiRadius, hiRadius, h, w);
             } else {
                 w = boxBlur<true>(sp, src.fRowBytes, tp, rx, rx, w, h);
-                h = boxBlur<true>(tp, h, dp, ry, ry, h, w);
+                h = boxBlur<true>(tp, h,             dp, ry, ry, h, w);
             }
         } else {
             if (kHigh_SkBlurQuality == quality) {
                 // Do three X blurs, with a transpose on the final one.
                 w = boxBlurInterp<false>(sp, src.fRowBytes, tp, rx, w, h, outerWeight);
-                w = boxBlurInterp<false>(tp, w, dp, rx, w, h, outerWeight);
-                w = boxBlurInterp<true>(dp, w, tp, rx, w, h, outerWeight);
+                w = boxBlurInterp<false>(tp, w,             dp, rx, w, h, outerWeight);
+                w = boxBlurInterp<true>(dp, w,             tp, rx, w, h, outerWeight);
                 // Do three Y blurs, with a transpose on the final one.
-                h = boxBlurInterp<false>(tp, h, dp, ry, h, w, outerWeight);
-                h = boxBlurInterp<false>(dp, h, tp, ry, h, w, outerWeight);
-                h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight);
+                h = boxBlurInterp<false>(tp, h,             dp, ry, h, w, outerWeight);
+                h = boxBlurInterp<false>(dp, h,             tp, ry, h, w, outerWeight);
+                h = boxBlurInterp<true>(tp, h,             dp, ry, h, w, outerWeight);
             } else {
                 w = boxBlurInterp<true>(sp, src.fRowBytes, tp, rx, w, h, outerWeight);
-                h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight);
+                h = boxBlurInterp<true>(tp, h,             dp, ry, h, w, outerWeight);
             }
         }
 
-        dst->fImage = autoCall.release();
-    }
-#else
-    SkMaskBlurFilter blurFilter{sigma, sigma};
-    border = blurFilter.blur(src, dst);
-#endif  // SK_SUPPORT_LEGACY_MASK_BLUR
-
-    if (src.fImage != nullptr) {
+        dst->fImage = dp;
         // if need be, alloc the "real" dst (same size as src) and copy/merge
         // the blur into it (applying the src)
         if (style == kInner_SkBlurStyle) {
@@ -602,21 +590,17 @@
             if (0 == srcSize) {
                 return false;   // too big to allocate, abort
             }
-            auto blur = dst->fImage;
             dst->fImage = SkMask::AllocImage(srcSize);
-            auto blurStart = &blur[border.x() + border.y() * dst->fRowBytes];
             merge_src_with_blur(dst->fImage, src.fRowBytes,
-                                src.fImage, src.fRowBytes,
-                                blurStart,
-                                dst->fRowBytes,
-                                src.fBounds.width(), src.fBounds.height());
-            SkMask::FreeImage(blur);
+                                sp, src.fRowBytes,
+                                dp + passCount * (rx + ry * dst->fRowBytes),
+                                dst->fRowBytes, sw, sh);
+            SkMask::FreeImage(dp);
         } else if (style != kNormal_SkBlurStyle) {
-            auto dstStart = &dst->fImage[border.x() + border.y() * dst->fRowBytes];
-            clamp_with_orig(dstStart,
-                            dst->fRowBytes, src.fImage, src.fRowBytes,
-                            src.fBounds.width(), src.fBounds.height(), style);
+            clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
+                            dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
         }
+        (void)autoCall.release();
     }
 
     if (style == kInner_SkBlurStyle) {
@@ -624,10 +608,6 @@
         dst->fRowBytes = src.fRowBytes;
     }
 
-    if (margin != nullptr) {
-        *margin = border;
-    }
-
     return true;
 }