Convert A8 D32 mask blitters to Sk4px

Improves the newly added bench by ~25% (hsw):

-- before --

    micros   	bench
   2298.34  	shadermaskfilter_picture_80	8888
   2339.60  	shadermaskfilter_picture_ff	8888
   2287.11  	shadermaskfilter_bitmap_80	8888
   2223.14  	shadermaskfilter_bitmap_ff	8888

-- after --

   1693.36  	shadermaskfilter_picture_80	8888
   1637.45  	shadermaskfilter_picture_ff	8888
   1691.65  	shadermaskfilter_bitmap_80	8888
   1637.70  	shadermaskfilter_bitmap_ff	8888

But: skia:7810
Change-Id: I7274b10f517551ee2c0646842f72e0372d55e509
Reviewed-on: https://skia-review.googlesource.com/121642
Commit-Queue: Florin Malita <fmalita@chromium.org>
Reviewed-by: Mike Klein <mtklein@google.com>
diff --git a/bench/ShaderMaskFilterBench.cpp b/bench/ShaderMaskFilterBench.cpp
new file mode 100644
index 0000000..4a2ec14
--- /dev/null
+++ b/bench/ShaderMaskFilterBench.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "Benchmark.h"
+#include "SkCanvas.h"
+#include "SkPaint.h"
+#include "SkPictureRecorder.h"
+#include "SkPictureShader.h"
+#include "SkShaderMaskFilter.h"
+#include "SkSurface.h"
+
+static sk_sp<SkShader> make_bitmap_shader() {
+    SkPaint p;
+    p.setColor(SK_ColorBLACK);
+    p.setAntiAlias(true);
+
+    auto surface = SkSurface::MakeRasterN32Premul(100, 100);
+    surface->getCanvas()->drawCircle(50, 50, 50, p);
+
+    return surface->makeImageSnapshot()->makeShader(SkShader::kRepeat_TileMode,
+                                                    SkShader::kRepeat_TileMode);
+}
+
+static sk_sp<SkShader> make_picture_shader() {
+    SkPaint p;
+    p.setColor(SK_ColorBLACK);
+    p.setAntiAlias(true);
+
+    SkPictureRecorder recorder;
+    recorder.beginRecording(100, 100)->drawCircle(50, 50, 50, p);
+
+    return SkPictureShader::Make(recorder.finishRecordingAsPicture(),
+                                 SkShader::kRepeat_TileMode,
+                                 SkShader::kRepeat_TileMode,
+                                 nullptr, nullptr);
+}
+
+class ShaderMFBench final : public Benchmark {
+
+public:
+    using ShaderMaker = sk_sp<SkShader>(*)();
+
+    ShaderMFBench(const char* nm, bool opaque, const ShaderMaker& maker) {
+        fMaskFilter = SkShaderMaskFilter::Make(maker());
+        fColor = opaque ? 0xff00ff00 : 0x8000ff00;
+        fName = SkStringPrintf("shadermaskfilter_%s_%x", nm, SkColorGetA(fColor));
+    }
+
+protected:
+    const char* onGetName() override {
+        return fName.c_str();
+    }
+
+    void onDraw(int loops, SkCanvas* canvas) override {
+        SkPaint maskPaint;
+        maskPaint.setMaskFilter(fMaskFilter);
+
+        for (int i = 0; i < loops; ++i) {
+            SkAutoCanvasRestore arc(canvas, false);
+            canvas->saveLayer(nullptr, &maskPaint);
+            canvas->drawColor(fColor);
+        }
+    }
+
+private:
+    SkString            fName;
+    sk_sp<SkMaskFilter> fMaskFilter;
+    SkColor  fColor;
+
+    using INHERITED = Benchmark;
+};
+
+DEF_BENCH( return new ShaderMFBench("bitmap" , true , make_bitmap_shader ); )
+DEF_BENCH( return new ShaderMFBench("bitmap" , false, make_bitmap_shader ); )
+DEF_BENCH( return new ShaderMFBench("picture", true , make_picture_shader); )
+DEF_BENCH( return new ShaderMFBench("picture", false, make_picture_shader); )
diff --git a/gn/bench.gni b/gn/bench.gni
index e70916c..598e430 100644
--- a/gn/bench.gni
+++ b/gn/bench.gni
@@ -101,6 +101,7 @@
   "$_bench/RTreeBench.cpp",
   "$_bench/ScalarBench.cpp",
   "$_bench/ShaderMaskBench.cpp",
+  "$_bench/ShaderMaskFilterBench.cpp",
   "$_bench/ShadowBench.cpp",
   "$_bench/ShapesBench.cpp",
   "$_bench/Sk4fBench.cpp",
diff --git a/gn/tests.gni b/gn/tests.gni
index 0bbb6da..d18758c 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -21,6 +21,7 @@
   "$_tests/BitSetTest.cpp",
   "$_tests/BlendTest.cpp",
   "$_tests/BlitMaskClip.cpp",
+  "$_tests/BlitMaskTest.cpp",
   "$_tests/BlurTest.cpp",
   "$_tests/CachedDataTest.cpp",
   "$_tests/CachedDecodingPixelRefTest.cpp",
diff --git a/src/core/SkBlitMask_D32.cpp b/src/core/SkBlitMask_D32.cpp
index 1759ed6..4e88d33 100644
--- a/src/core/SkBlitMask_D32.cpp
+++ b/src/core/SkBlitMask_D32.cpp
@@ -5,6 +5,7 @@
  * found in the LICENSE file.
  */
 
+#include "Sk4px.h"
 #include "SkBlitMask.h"
 #include "SkColor.h"
 #include "SkColorData.h"
@@ -78,11 +79,20 @@
 static void A8_RowProc_Blend(
         SkPMColor* SK_RESTRICT dst, const void* maskIn, const SkPMColor* SK_RESTRICT src, int count) {
     const uint8_t* SK_RESTRICT mask = static_cast<const uint8_t*>(maskIn);
+
+#ifndef SK_SUPPORT_LEGACY_A8_MASKBLITTER
+    Sk4px::MapDstSrcAlpha(count, dst, src, mask,
+        [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
+            const auto s_aa = s.approxMulDiv255(aa);
+            return s_aa + d.approxMulDiv255(s_aa.alphas().inv());
+        });
+#else
     for (int i = 0; i < count; ++i) {
         if (mask[i]) {
             dst[i] = SkBlendARGB32(src[i], dst[i], mask[i]);
         }
     }
+#endif
 }
 
 // expand the steps that SkAlphaMulQ performs, but this way we can
@@ -97,6 +107,13 @@
 static void A8_RowProc_Opaque(
         SkPMColor* SK_RESTRICT dst, const void* maskIn, const SkPMColor* SK_RESTRICT src, int count) {
     const uint8_t* SK_RESTRICT mask = static_cast<const uint8_t*>(maskIn);
+
+#ifndef SK_SUPPORT_LEGACY_A8_MASKBLITTER
+    Sk4px::MapDstSrcAlpha(count, dst, src, mask,
+        [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
+            return (s * aa + d * aa.inv()).div255();
+        });
+#else
     for (int i = 0; i < count; ++i) {
         int m = mask[i];
         if (m) {
@@ -117,6 +134,7 @@
 #endif
         }
     }
+#endif // SK_SUPPORT_LEGACY_A8_MASKBLITTER
 }
 
 static int upscale31To255(int value) {
diff --git a/tests/BlitMaskTest.cpp b/tests/BlitMaskTest.cpp
new file mode 100644
index 0000000..04bec66
--- /dev/null
+++ b/tests/BlitMaskTest.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkBlitMask.h"
+#include "SkColorPriv.h"
+#include "SkMask.h"
+#include "Test.h"
+
+static void test_opaque_dest(skiatest::Reporter* reporter, SkMask::Format format) {
+    const auto& row_proc = SkBlitMask::RowFactory(SkColorType::kN32_SkColorType, format,
+                                                  static_cast<SkBlitMask::RowFlags>(0));
+
+    SkPMColor src[256],
+              dst[256];
+    uint8_t    aa[256];
+
+    // Coverage -> [0..255]
+    for (size_t i = 0; i < 256; ++i) {
+        aa[i] = static_cast<uint8_t>(i);
+    }
+
+    // src -> [0..255]
+    for (size_t src_a = 0; src_a < 256; ++src_a) {
+        memset(src, src_a, sizeof(src));
+
+        // dst -> 0xff (always opaque)
+        memset(dst, 0xff, sizeof(dst));
+
+        row_proc(dst, aa, src, 256);
+
+        for (size_t i = 0; i < 256; ++i) {
+            REPORTER_ASSERT(reporter, SkGetPackedA32(dst[i]) == 0xff);
+        }
+    }
+}
+
+// Verifies that D32 dest remains opaque for any (src_alpha, coverage) combination.
+DEF_TEST(BlitMask_OpaqueD32, reporter) {
+    test_opaque_dest(reporter, SkMask::Format::kA8_Format);
+}