Make all SkRasterPipeline stages stock stages in SkOpts.

If we want to support VEX-encoded instructions (AVX, F16C, etc.) without a ridiculous slowdown, we need to make sure we're running either all VEX-encoded instructions or all non-VEX-encoded instructions.  That means we cannot mix arbitrary user-defined SkRasterPipeline::Fn (never VEX) with those living in SkOpts (maybe VEX)... it's SkOpts or bust.

This ports the existing user-defined SkRasterPipeline::Fn use cases over to use stock stages from SkOpts.  I rewrote the unit test to use stock stages, and moved the SkXfermode implementations to SkOpts.  The code deleted for SkArithmeticMode_scalar should already be dead.


BUG=skia:

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2940
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: I94dbe766b2d65bfec6e544d260f71d721f0f5cb0
Reviewed-on: https://skia-review.googlesource.com/2940
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Reed <reed@google.com>
diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp
index ccc728e..282b234 100644
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@@ -6,69 +6,31 @@
  */
 
 #include "Test.h"
+#include "SkHalf.h"
 #include "SkRasterPipeline.h"
 
-static void SK_VECTORCALL load(SkRasterPipeline::Stage* st, size_t x, size_t tail,
-                               Sk4f  r, Sk4f  g, Sk4f  b, Sk4f  a,
-                               Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
-    auto ptr = st->ctx<const float*>() + x;
-    switch(tail&3) {
-        case 0: a = Sk4f{ptr[3]};
-        case 3: b = Sk4f{ptr[2]};
-        case 2: g = Sk4f{ptr[1]};
-        case 1: r = Sk4f{ptr[0]};
-    }
-    st->next(x,tail, r,g,b,a, dr,dg,db,da);
-}
-
-static void SK_VECTORCALL square(SkRasterPipeline::Stage* st, size_t x, size_t tail,
-                                 Sk4f  r, Sk4f  g, Sk4f  b, Sk4f  a,
-                                 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
-    r *= r;
-    g *= g;
-    b *= b;
-    a *= a;
-    st->next(x,tail, r,g,b,a, dr,dg,db,da);
-}
-
-static void SK_VECTORCALL store(SkRasterPipeline::Stage* st, size_t x, size_t tail,
-                                Sk4f  r, Sk4f  g, Sk4f  b, Sk4f  a,
-                                Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
-    auto ptr = st->ctx<float*>() + x;
-    switch (tail&3) {
-        case 0: ptr[3] = a[0];
-        case 3: ptr[2] = b[0];
-        case 2: ptr[1] = g[0];
-        case 1: ptr[0] = r[0];
-    }
-}
-
 DEF_TEST(SkRasterPipeline, r) {
-    // We'll build up and run a simple pipeline that exercises the salient
-    // mechanics of SkRasterPipeline:
-    //    - context pointers                           (load,store)
-    //    - stages sensitive to the number of pixels   (load,store)
-    //    - stages insensitive to the number of pixels (square)
-    //    - stages that chain to the next stage        (load,square)
-    //    - stages that terminate the pipeline         (store)
-    //
-    // This pipeline loads up some values, squares them, then writes them back to memory.
+    // Build and run a simple pipeline to exercise SkRasterPipeline,
+    // drawing 50% transparent blue over opaque red in half-floats.
 
-    const float src_vals[] = { 1,2,3,4,5 };
-    float       dst_vals[] = { 0,0,0,0,0 };
+    Sk4h red  = SkFloatToHalf_finite_ftz({ 1.0f, 0.0f, 0.0f, 1.0f }),
+         blue = SkFloatToHalf_finite_ftz({ 0.0f, 0.0f, 0.5f, 0.5f }),
+         result;
 
     SkRasterPipeline p;
-    p.append(load, src_vals);
-    p.append(square);
-    p.append(store, dst_vals);
+    p.append(SkRasterPipeline::load_s_f16, &blue);
+    p.append(SkRasterPipeline::load_d_f16, &red);
+    p.append(SkRasterPipeline::srcover);
+    p.append(SkRasterPipeline::store_f16, &result);
+    p.run(1);
 
-    p.run(5);
+    Sk4f f = SkHalfToFloat_finite_ftz(result);
 
-    REPORTER_ASSERT(r, dst_vals[0] ==  1);
-    REPORTER_ASSERT(r, dst_vals[1] ==  4);
-    REPORTER_ASSERT(r, dst_vals[2] ==  9);
-    REPORTER_ASSERT(r, dst_vals[3] == 16);
-    REPORTER_ASSERT(r, dst_vals[4] == 25);
+    // We should see half-intensity magenta.
+    REPORTER_ASSERT(r, f[0] == 0.5f);
+    REPORTER_ASSERT(r, f[1] == 0.0f);
+    REPORTER_ASSERT(r, f[2] == 0.5f);
+    REPORTER_ASSERT(r, f[3] == 1.0f);
 }
 
 DEF_TEST(SkRasterPipeline_empty, r) {
@@ -79,8 +41,8 @@
 
 DEF_TEST(SkRasterPipeline_nonsense, r) {
     // No asserts... just a test that this is safe to run and terminates.
-    // square() always calls st->next(); this makes sure we've always got something there to call.
+    // srcover() calls st->next(); this makes sure we've always got something there to call.
     SkRasterPipeline p;
-    p.append(square);
+    p.append(SkRasterPipeline::srcover);
     p.run(20);
 }