Set up some hooks for premul/swizzzle opts.

  You can call these as SkOpts::premul_xxxa, SkOpts::swaprb_xxxa, etc.

  For now, I just backed the function pointers with some (untested) portable
  code, which may autovectorize.  We can override with optimized versions in
  Init_ssse3() (in SkOpts_ssse3.cpp), Init_neon() (SkOpts_neon.cpp), etc.

BUG=skia:4767
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1569013002

Review URL: https://codereview.chromium.org/1569013002
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 781977e..ee88b23 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -49,6 +49,58 @@
     #include <cpu-features.h>
 #endif
 
+namespace sk_default {
+
+// These variable names in these functions just pretend the input is BGRA.
+// They work fine with both RGBA and BGRA.
+
+static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+    for (int i = 0; i < count; i++) {
+        uint8_t a = src[i] >> 24,
+                r = src[i] >> 16,
+                g = src[i] >>  8,
+                b = src[i] >>  0;
+        r = (r*a+127)/255;
+        g = (g*a+127)/255;
+        b = (b*a+127)/255;
+        dst[i] = (uint32_t)a << 24
+               | (uint32_t)r << 16
+               | (uint32_t)g <<  8
+               | (uint32_t)b <<  0;
+    }
+}
+
+static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+    for (int i = 0; i < count; i++) {
+        uint8_t a = src[i] >> 24,
+                r = src[i] >> 16,
+                g = src[i] >>  8,
+                b = src[i] >>  0;
+        dst[i] = (uint32_t)a << 24
+               | (uint32_t)b << 16
+               | (uint32_t)g <<  8
+               | (uint32_t)r <<  0;
+    }
+}
+
+static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+    for (int i = 0; i < count; i++) {
+        uint8_t a = src[i] >> 24,
+                r = src[i] >> 16,
+                g = src[i] >>  8,
+                b = src[i] >>  0;
+        r = (r*a+127)/255;
+        g = (g*a+127)/255;
+        b = (b*a+127)/255;
+        dst[i] = (uint32_t)a << 24
+               | (uint32_t)b << 16
+               | (uint32_t)g <<  8
+               | (uint32_t)r <<  0;
+    }
+}
+
+}  // namespace sk_default
+
 namespace SkOpts {
     // Define default function pointer values here...
     // If our global compile options are set high enough, these defaults might even be
@@ -80,6 +132,10 @@
     decltype(matrix_scale_translate) matrix_scale_translate = sk_default::matrix_scale_translate;
     decltype(matrix_affine)          matrix_affine          = sk_default::matrix_affine;
 
+    decltype(       premul_xxxa)        premul_xxxa = sk_default::       premul_xxxa;
+    decltype(       swaprb_xxxa)        swaprb_xxxa = sk_default::       swaprb_xxxa;
+    decltype(premul_swaprb_xxxa) premul_swaprb_xxxa = sk_default::premul_swaprb_xxxa;
+
     // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
     void Init_ssse3();
     void Init_sse41();
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 1b94126..85e38fe 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -57,6 +57,11 @@
                                           const SkColor*);
 
     extern SkMatrix::MapPtsProc matrix_translate, matrix_scale_translate, matrix_affine;
+
+    typedef void (*Swizzle_8888_8888)(uint32_t[], const uint32_t[], int);
+    extern Swizzle_8888_8888 premul_xxxa,  // BGRA -> bgrA or RGBA -> rgbA
+                             swaprb_xxxa,  // BGRA -> RGBA or RGBA -> BGRA
+                      premul_swaprb_xxxa;  // BGRA -> rgbA or RGBA -> bgrA
 }
 
 #endif//SkOpts_DEFINED