Port SkXfermode opts to SkOpts.h

Renames Sk4pxXfermode.h to SkXfermode_opts.h,
and refactors it a tiny bit internally.

This moves xfermode optimization from being "compile-time everywhere but NEON"
to simply "runtime everywhere".  I don't anticipate any effect on perf or
correctness.

BUG=skia:4117

Review URL: https://codereview.chromium.org/1264543006
diff --git a/include/private/SkOpts.h b/include/private/SkOpts.h
index 0594588..9239f8e 100644
--- a/include/private/SkOpts.h
+++ b/include/private/SkOpts.h
@@ -9,6 +9,9 @@
 #define SkOpts_DEFINED
 
 #include "SkTypes.h"
+#include "SkXfermode.h"
+
+struct ProcCoeff;
 
 namespace SkOpts {
     // Call to replace pointers to portable functions with pointers to CPU-specific functions.
@@ -24,6 +27,9 @@
     // See SkUtils.h
     extern void (*memset16)(uint16_t[], uint16_t, int);
     extern void (*memset32)(uint32_t[], uint32_t, int);
+
+    // May return nullptr if we haven't specialized the given Mode.
+    extern SkXfermode* (*create_xfermode)(const ProcCoeff&, SkXfermode::Mode);
 }
 
 #endif//SkOpts_DEFINED