SkXbyak basics

A little JIT proof of concept for SkRasterPipeline, using xbyak, which is a header-only assembler.  It's x86-only, but supports x86 very thoroughly, and it's very user friendly (at least as far as assembler libraries go...).

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD

Change-Id: Ie17e562b0f3fff5914041badfb2c1fe4f86efab8
Reviewed-on: https://skia-review.googlesource.com/5730
Reviewed-by: Herb Derby <herb@google.com>
Reviewed-by: Heather Miller <hcm@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index 2c9335c..82751cf 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -23,6 +23,7 @@
   skia_use_lua = false
   skia_use_mesa = false
   skia_use_piex = !is_win
+  skia_use_xbyak = false
   skia_use_zlib = true
 
   skia_android_serial = ""
@@ -506,6 +507,22 @@
   ]
 }
 
+optional("xbyak") {
+  enabled = skia_use_xbyak
+  public_defines = [ "SK_XBYAK" ]
+
+  deps = [
+    "//third_party/xbyak",
+  ]
+
+  # xbyak uses exceptions, but SkXbyak catches them all.
+  configs_to_remove = [ "//gn:no_exceptions" ]
+
+  sources = [
+    "src/opts/SkXbyak.cpp",
+  ]
+}
+
 optional("xml") {
   enabled = skia_use_expat
   public_defines = [ "SK_XML" ]
@@ -546,6 +563,7 @@
     ":ssse3",
     ":typeface_freetype",
     ":webp",
+    ":xbyak",
     ":xml",
   ]
 
diff --git a/DEPS b/DEPS
index 60c7ce9..0eb3193 100644
--- a/DEPS
+++ b/DEPS
@@ -42,6 +42,9 @@
 
   # microhttpd for skiaserve
   "third_party/externals/microhttpd" : "https://android.googlesource.com/platform/external/libmicrohttpd@748945ec6f1c67b7efc934ab0808e1d32f2fb98d",
+
+  # TODO: mirror to skia.googlesource.com
+  "third_party/externals/xbyak" : "https://github.com/herumi/xbyak",
 }
 
 recursedeps = [ "common" ]
diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp
index 7b3d49d..8d0840f 100644
--- a/src/core/SkRasterPipeline.cpp
+++ b/src/core/SkRasterPipeline.cpp
@@ -27,6 +27,12 @@
 }
 
 std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
+#ifdef SK_XBYAK
+    if (auto fn = this->jit()) {
+        SkDebugf("Jitted with xbyak!\n");
+        return fn;
+    }
+#endif
     return SkOpts::compile_pipeline(fStages.data(), SkToInt(fStages.size()));
 }
 
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index e6c9915..b73f626 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -126,6 +126,8 @@
     void append_from_srgb(SkAlphaType);
 
 private:
+    std::function<void(size_t, size_t, size_t)> jit() const;
+
     std::vector<Stage> fStages;
 };
 
diff --git a/src/opts/SkXbyak.cpp b/src/opts/SkXbyak.cpp
new file mode 100644
index 0000000..12f5200
--- /dev/null
+++ b/src/opts/SkXbyak.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkCpu.h"
+#include "SkRasterPipeline.h"
+#include <memory>
+
+#if defined(__clang__)
+    #pragma clang diagnostic ignored "-Wduplicate-enum"
+#endif
+#define XBYAK_NO_OP_NAMES   // xor(), not(), etc. -> xor_(), not_(), etc.
+#include "xbyak/xbyak.h"
+
+namespace {
+
+    struct Pipeline : public Xbyak::CodeGenerator {
+
+        static std::shared_ptr<Pipeline> Create(const SkRasterPipeline::Stage* stages, int n) {
+            if (!SkCpu::Supports(SkCpu::HSW)) {
+                // TODO: other targets?
+                return nullptr;
+            }
+
+            bool supported = true;
+            auto pipeline = std::make_shared<Pipeline>(stages, n, &supported);
+            if (supported) {
+                return pipeline;
+            }
+            return nullptr;
+        }
+
+        Pipeline(const SkRasterPipeline::Stage* stages, int n, bool* supported) {
+            // Set up some register name aliases.
+            //auto x = rdi, y = rsi, tail = rdx;
+            auto r = ymm0,  g = ymm1,  b = ymm2,  a = ymm3,
+                dr = ymm4, dg = ymm5, db = ymm6, da = ymm7;
+
+            Xbyak::Label floatOneStorage;
+            vbroadcastss(ymm8, ptr[rip + floatOneStorage]);
+
+            // TODO: set up (x+0.5,y+0.5) in (r,g)
+            vxorps(r,r);
+            vxorps(g,g);
+            vxorps(b,b);
+            vxorps(a,a);
+            vxorps(dr,dr);
+            vxorps(dg,dg);
+            vxorps(db,db);
+            vxorps(da,da);
+
+            for (int i = 0; i < n; i++) {
+                switch(stages[i].stage) {
+
+                    default:
+                        *supported = false;
+                        return;
+                }
+            }
+
+            ret();
+            L(floatOneStorage); df(1.0f);
+        }
+
+        void df(float f) {
+            union { float f; uint32_t x; } pun = {f};
+            dd(pun.x);
+        }
+    };
+
+}  // namespace
+
+std::function<void(size_t, size_t, size_t)> SkRasterPipeline::jit() const {
+    try {
+        if (auto pipeline = Pipeline::Create(fStages.data(), SkToInt(fStages.size()))) {
+            return [pipeline] (size_t x, size_t y, size_t n) {
+                auto call = pipeline->getCode<void(*)(size_t, size_t, size_t)>();
+                while (n >= 8) {
+                    call(x,y,0);
+                    x += 8;
+                    n -= 8;
+                }
+                if (n) {
+                    call(x,y,n);
+                }
+            };
+        }
+        SkDebugf("Cannot yet JIT with xbyak:\n");
+        this->dump();
+        return nullptr;
+    } catch(...) {
+        return nullptr;
+    }
+}
diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp
index ddd84e7..2f9b406 100644
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@@ -34,6 +34,16 @@
     REPORTER_ASSERT(r, ((result >> 16) & 0xffff) == 0x0000);
     REPORTER_ASSERT(r, ((result >> 32) & 0xffff) == 0x3800);
     REPORTER_ASSERT(r, ((result >> 48) & 0xffff) == 0x3c00);
+
+    // Run again, this time compiling the pipeline.
+    result = 0;
+
+    auto fn = p.compile();
+    fn(0,0, 1);
+    REPORTER_ASSERT(r, ((result >>  0) & 0xffff) == 0x3800);
+    REPORTER_ASSERT(r, ((result >> 16) & 0xffff) == 0x0000);
+    REPORTER_ASSERT(r, ((result >> 32) & 0xffff) == 0x3800);
+    REPORTER_ASSERT(r, ((result >> 48) & 0xffff) == 0x3c00);
 }
 
 DEF_TEST(SkRasterPipeline_empty, r) {
diff --git a/third_party/xbyak/BUILD.gn b/third_party/xbyak/BUILD.gn
new file mode 100644
index 0000000..7595c32
--- /dev/null
+++ b/third_party/xbyak/BUILD.gn
@@ -0,0 +1,13 @@
+# Copyright 2016 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+declare_args() {
+}
+
+import("../third_party.gni")
+
+third_party("xbyak") {
+  public_include_dirs = [ "../externals/xbyak/" ]
+}