SkRasterPipeline: implement SkLumaColorFilter

After getting discouraged by the non-separable xfermodes, I decided to look at filling out the color filters instead.  This one's nice and easy.

There's only 1 GM that exercises this color filter, and it's drawing noticeably lighter now in f16 and sRGB configs.  565 is unchanged.  This makes me think the diffs are due to lost precision in the previous method, which was going through the default fallback to 8888 filterSpan().

I double checked: the f16 config now draws nearly identically to the gpuf16 config.  It used to be quite different.

BUG=skia:

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=4183
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: Ic6feaecae5cf18493b5df89733f6a5ca362e9a75
Reviewed-on: https://skia-review.googlesource.com/4183
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/include/effects/SkLumaColorFilter.h b/include/effects/SkLumaColorFilter.h
index 9625435..a55e37a 100644
--- a/include/effects/SkLumaColorFilter.h
+++ b/include/effects/SkLumaColorFilter.h
@@ -11,6 +11,8 @@
 #include "SkColorFilter.h"
 #include "SkRefCnt.h"
 
+class SkRasterPipeline;
+
 /**
  *  Luminance-to-alpha color filter, as defined in
  *  http://www.w3.org/TR/SVG/masking.html#Masking
@@ -40,6 +42,7 @@
 
 private:
     SkLumaColorFilter();
+    bool onAppendStages(SkRasterPipeline*) const override;
 
     typedef SkColorFilter INHERITED;
 };
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index ccfdeb5..6d8fef2 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -54,19 +54,20 @@
 // TODO: There may be a better place to stuff tail, e.g. in the bottom alignment bits of
 // the Stage*.  This mostly matters on 64-bit Windows where every register is precious.
 
-#define SK_RASTER_PIPELINE_STAGES(M)                            \
-    M(swap_src_dst) M(constant_color) M(clamp_1)                \
-    M(load_s_565)  M(load_d_565)  M(store_565)                  \
-    M(load_s_srgb) M(load_d_srgb) M(store_srgb)                 \
-    M(load_s_f16)  M(load_d_f16)  M(store_f16)                  \
-    M(scale_u8)                                                 \
-    M(lerp_u8) M(lerp_565) M(lerp_constant_float)               \
-    M(dst)                                                      \
-    M(dstatop) M(dstin) M(dstout) M(dstover)                    \
-    M(srcatop) M(srcin) M(srcout) M(srcover)                    \
-    M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_) \
-    M(colorburn) M(colordodge) M(darken) M(difference)          \
-    M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight)
+#define SK_RASTER_PIPELINE_STAGES(M)                             \
+    M(swap_src_dst) M(constant_color) M(clamp_1)                 \
+    M(load_s_565)  M(load_d_565)  M(store_565)                   \
+    M(load_s_srgb) M(load_d_srgb) M(store_srgb)                  \
+    M(load_s_f16)  M(load_d_f16)  M(store_f16)                   \
+    M(scale_u8)                                                  \
+    M(lerp_u8) M(lerp_565) M(lerp_constant_float)                \
+    M(dst)                                                       \
+    M(dstatop) M(dstin) M(dstout) M(dstover)                     \
+    M(srcatop) M(srcin) M(srcout) M(srcover)                     \
+    M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_)  \
+    M(colorburn) M(colordodge) M(darken) M(difference)           \
+    M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight) \
+    M(luminance_to_alpha)
 
 class SkRasterPipeline {
 public:
diff --git a/src/effects/SkLumaColorFilter.cpp b/src/effects/SkLumaColorFilter.cpp
index 19b2c72..e3c4f37 100644
--- a/src/effects/SkLumaColorFilter.cpp
+++ b/src/effects/SkLumaColorFilter.cpp
@@ -8,6 +8,7 @@
 #include "SkLumaColorFilter.h"
 
 #include "SkColorPriv.h"
+#include "SkRasterPipeline.h"
 #include "SkString.h"
 
 #if SK_SUPPORT_GPU
@@ -37,6 +38,11 @@
     }
 }
 
+bool SkLumaColorFilter::onAppendStages(SkRasterPipeline* p) const {
+    p->append(SkRasterPipeline::luminance_to_alpha);
+    return true;
+}
+
 sk_sp<SkColorFilter> SkLumaColorFilter::Make() {
     return sk_sp<SkColorFilter>(new SkLumaColorFilter);
 }
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index b22c9fc..845fe40 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -8,6 +8,7 @@
 #ifndef SkRasterPipeline_opts_DEFINED
 #define SkRasterPipeline_opts_DEFINED
 
+#include "SkColorPriv.h"
 #include "SkHalf.h"
 #include "SkPM4f.h"
 #include "SkRasterPipeline.h"
@@ -444,6 +445,11 @@
     return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc);  // 1 or (2 or 3)?
 }
 
+STAGE(luminance_to_alpha, true) {
+    a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b;
+    r = g = b = 0;
+}
+
 
 template <typename Fn>
 SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {