move skvm interpreter to SkOpts again

This is the easiest way to guarantee Op::fma_f32
actually fuses, by using platform intrinsics.

While implementing this we noticed that quad-pumping
was actually slower than double-pumping by about 25%,
and single-pumping was between the two.  Switch from
quad to double pumping.

Change-Id: Ib93fd175fb8f6aaf49f769a95edfa9fd6b2674f6
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/275299
Commit-Queue: Mike Klein <mtklein@google.com>
Commit-Queue: Herb Derby <herb@google.com>
Reviewed-by: Herb Derby <herb@google.com>
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index dd1cc9d..5b8e773 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -13,6 +13,7 @@
 #include "src/core/SkXfermodePriv.h"
 
 struct SkBitmapProcState;
+namespace skvm { struct InterpreterInstruction; }
 
 namespace SkOpts {
     // Call to replace pointers to portable functions with pointers to CPU-specific functions.
@@ -77,6 +78,9 @@
     extern void (*start_pipeline_lowp )(size_t,size_t,size_t,size_t, void**);
 #undef M
 
+    extern void (*interpret_skvm)(const skvm::InterpreterInstruction insts[], int ninsts,
+                                  int nregs, int loop, const int strides[], int nargs,
+                                  int n, void* args[]);
 }
 
 #endif//SkOpts_DEFINED