Start moving SkRasterPipeline stages to SkOpts. This lets them pick up runtime CPU specializations. Here I've plugged in SSE4.1. This is still one of the N prelude CLs to full 8-at-a-time AVX. I've moved the union of the stages used by SkRasterPipelineBench and SkRasterPipelineBlitter to SkOpts... they'll all be used by the blitter eventually. Picking up SSE4.1 specialization here (even still just 4 pixels at a time) is a significant speedup, especially to store_srgb(), so much that it's no longer really interesting to compare against the fused-but-default-instruction-set version in the bench. So that's gone now. That left the SkRasterPipeline unit test as the only other user of the EasyFn simplified interface to SkRasterPipeline. So I converted that back down to the bare-metal interface, and EasyFn and its friends became SkRasterPipeline_opts.h exclusive abbreviations (now called Kernel_Sk4f). This isn't really unexpected: SkXfermode also wanted to build up its own little abstractions, and once you build your own abstraction, the value of an additional EasyFn-like layer plummets to negative. For simplicity I've left the SkXfermode stages alone, except srcover() which was always part of the blitter. No particular reason except keeping the churn down while I hack. These _can_ be in SkOpts, but don't have to be until we go 8-at-a-time. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2752 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: I3b476b18232a1598d8977e425be2150059ab71dc Reviewed-on: https://skia-review.googlesource.com/2752 Reviewed-by: Mike Klein <mtklein@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>

commit: baaf8ad95237d1defdb7d93077d9bf8410d8ad7f [log] [tgz]
author: Mike Klein <mtklein@chromium.org> Thu Sep 29 09:04:15 2016 -0400
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> Thu Sep 29 16:20:26 2016 +0000
tree: 4635750d0b6c5b792a6d0e226e7fe8042a394fa9
parent: 8e6791fb9a79c38228339ffc5c75d73b58893fa8 [diff] [blame]
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp
index aa3e355..17a93da 100644
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp

@@ -6,8 +6,8 @@
  */
 
 #include "Benchmark.h"
+#include "SkOpts.h"
 #include "SkRasterPipeline.h"
-#include "SkSRGB.h"
 
 static const int N = 1023;
 
@@ -22,186 +22,21 @@
 //   - src = srcover(dst, src)
 //   - store src back as srgb
 
-SK_RASTER_STAGE(load_s_srgb) {
-    auto ptr = (const uint32_t*)ctx + x;
-
-    if (tail) {
-        float rs[] = {0,0,0,0},
-              gs[] = {0,0,0,0},
-              bs[] = {0,0,0,0},
-              as[] = {0,0,0,0};
-        for (size_t i = 0; i < (tail&3); i++) {
-            rs[i] = sk_linear_from_srgb[(ptr[i] >>  0) & 0xff];
-            gs[i] = sk_linear_from_srgb[(ptr[i] >>  8) & 0xff];
-            bs[i] = sk_linear_from_srgb[(ptr[i] >> 16) & 0xff];
-            as[i] = (ptr[i] >> 24) * (1/255.0f);
-        }
-        r = Sk4f::Load(rs);
-        g = Sk4f::Load(gs);
-        b = Sk4f::Load(bs);
-        a = Sk4f::Load(as);
-        return;
-    }
-
-    r = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  0) & 0xff],
-              sk_linear_from_srgb[(ptr[1] >>  0) & 0xff],
-              sk_linear_from_srgb[(ptr[2] >>  0) & 0xff],
-              sk_linear_from_srgb[(ptr[3] >>  0) & 0xff] };
-
-    g = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  8) & 0xff],
-              sk_linear_from_srgb[(ptr[1] >>  8) & 0xff],
-              sk_linear_from_srgb[(ptr[2] >>  8) & 0xff],
-              sk_linear_from_srgb[(ptr[3] >>  8) & 0xff] };
-
-    b = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff],
-              sk_linear_from_srgb[(ptr[1] >> 16) & 0xff],
-              sk_linear_from_srgb[(ptr[2] >> 16) & 0xff],
-              sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
-    a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
-}
-
-SK_RASTER_STAGE(load_d_srgb) {
-    auto ptr = (const uint32_t*)ctx + x;
-
-    if (tail) {
-        float rs[] = {0,0,0,0},
-              gs[] = {0,0,0,0},
-              bs[] = {0,0,0,0},
-              as[] = {0,0,0,0};
-        for (size_t i = 0; i < (tail&3); i++) {
-            rs[i] = sk_linear_from_srgb[(ptr[i] >>  0) & 0xff];
-            gs[i] = sk_linear_from_srgb[(ptr[i] >>  8) & 0xff];
-            bs[i] = sk_linear_from_srgb[(ptr[i] >> 16) & 0xff];
-            as[i] = (ptr[i] >> 24) * (1/255.0f);
-        }
-        dr = Sk4f::Load(rs);
-        dg = Sk4f::Load(gs);
-        db = Sk4f::Load(bs);
-        da = Sk4f::Load(as);
-        return;
-    }
-
-    dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  0) & 0xff],
-               sk_linear_from_srgb[(ptr[1] >>  0) & 0xff],
-               sk_linear_from_srgb[(ptr[2] >>  0) & 0xff],
-               sk_linear_from_srgb[(ptr[3] >>  0) & 0xff] };
-
-    dg = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  8) & 0xff],
-               sk_linear_from_srgb[(ptr[1] >>  8) & 0xff],
-               sk_linear_from_srgb[(ptr[2] >>  8) & 0xff],
-               sk_linear_from_srgb[(ptr[3] >>  8) & 0xff] };
-
-    db = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff],
-               sk_linear_from_srgb[(ptr[1] >> 16) & 0xff],
-               sk_linear_from_srgb[(ptr[2] >> 16) & 0xff],
-               sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
-
-    da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
-}
-
-SK_RASTER_STAGE(scale_u8) {
-    auto ptr = (const uint8_t*)ctx + x;
-
-    Sk4b cov;
-
-    if (tail) {
-        uint8_t cs[] = {0,0,0,0};
-        switch (tail&3) {
-            case 3: cs[2] = ptr[2];
-            case 2: cs[1] = ptr[1];
-            case 1: cs[0] = ptr[0];
-        }
-        cov = Sk4b::Load(cs);
-    } else {
-        cov = Sk4b::Load(ptr);
-    }
-
-    auto c = SkNx_cast<float>(cov) * (1/255.0f);
-    r *= c;
-    g *= c;
-    b *= c;
-    a *= c;
-}
-
-SK_RASTER_STAGE(srcover) {
-    auto A = 1.0f - a;
-    r += dr * A;
-    g += dg * A;
-    b += db * A;
-    a += da * A;
-}
-
-SK_RASTER_STAGE(store_srgb) {
-    auto ptr = (uint32_t*)ctx + x;
-
-    uint32_t* dst = nullptr;
-    uint32_t stack[4];
-
-    if (tail) {
-        dst = ptr;
-        ptr = stack;
-    }
-
-    ( sk_linear_to_srgb(r)
-    | sk_linear_to_srgb(g) << 8
-    | sk_linear_to_srgb(b) << 16
-    | Sk4f_round(255.0f*a) << 24).store(ptr);
-
-    switch (tail&3) {
-        case 3: dst[2] = ptr[2];
-        case 2: dst[1] = ptr[1];
-        case 1: dst[0] = ptr[0];
-    }
-}
-
 class SkRasterPipelineBench : public Benchmark {
 public:
-    SkRasterPipelineBench(bool fused) : fFused(fused) {}
-
     bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
-    const char* onGetName() override { return fFused ? "SkRasterPipelineBench_fused"
-                                                     : "SkRasterPipelineBench_pipeline"; }
+    const char* onGetName() override { return "SkRasterPipeline"; }
 
     void onDraw(int loops, SkCanvas*) override {
         while (loops --> 0) {
-            fFused ? this->runFused() : this->runPipeline();
+            SkRasterPipeline p;
+            p.append(SkOpts::load_s_srgb_body, SkOpts::load_s_srgb_tail, src);
+            p.append(SkOpts::scale_u8_body,    SkOpts::scale_u8_tail,    mask);
+            p.append(SkOpts::load_d_srgb_body, SkOpts::load_d_srgb_tail, dst);
+            p.append(SkOpts::srcover);
+            p.append(SkOpts::store_srgb_body,  SkOpts::store_srgb_tail,  dst);
+            p.run(N);
         }
     }
-
-    void runFused() {
-        Sk4f r,g,b,a, dr,dg,db,da;
-        size_t x = 0, n = N;
-        while (n >= 4) {
-            load_s_srgb(src    , x,0, r,g,b,a, dr,dg,db,da);
-            scale_u8   (mask   , x,0, r,g,b,a, dr,dg,da,da);
-            load_d_srgb(dst    , x,0, r,g,b,a, dr,dg,da,da);
-            srcover    (nullptr, x,0, r,g,b,a, dr,dg,da,da);
-            store_srgb (dst    , x,0, r,g,b,a, dr,dg,da,da);
-
-            x += 4;
-            n -= 4;
-        }
-        if (n > 0) {
-            load_s_srgb(src    , x,n, r,g,b,a, dr,dg,db,da);
-            scale_u8   (mask   , x,n, r,g,b,a, dr,dg,da,da);
-            load_d_srgb(dst    , x,n, r,g,b,a, dr,dg,da,da);
-            srcover    (nullptr, x,n, r,g,b,a, dr,dg,da,da);
-            store_srgb (dst    , x,n, r,g,b,a, dr,dg,da,da);
-        }
-    }
-
-    void runPipeline() {
-        SkRasterPipeline p;
-        p.append<load_s_srgb>(src);
-        p.append<   scale_u8>(mask);
-        p.append<load_d_srgb>(dst);
-        p.append<    srcover>();
-        p.last  < store_srgb>(dst);
-        p.run(N);
-    }
-
-    bool fFused;
 };
-
-DEF_BENCH( return new SkRasterPipelineBench(true); )
-DEF_BENCH( return new SkRasterPipelineBench(false); )
+DEF_BENCH( return new SkRasterPipelineBench; )
commit	baaf8ad95237d1defdb7d93077d9bf8410d8ad7f	[log] [tgz]
author	Mike Klein <mtklein@chromium.org>	Thu Sep 29 09:04:15 2016 -0400
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	Thu Sep 29 16:20:26 2016 +0000
tree	4635750d0b6c5b792a6d0e226e7fe8042a394fa9
parent	8e6791fb9a79c38228339ffc5c75d73b58893fa8 [diff] [blame]