skeleton for float <-> half optimized procs

Nothing fancy yet, just calls the serial code in a loop.

I will try to folow this up with at least some of:
   - SSE2 version of serial code
   - NEON version of serial code
   - NEON version using vcvt.f32.f16/vcvt.f16.f32
   - F16C (between AVX and AVX2) version using vcvtph2ps/vcvtps2ph
The last two are fastest but need runtime detection.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1686543003

Review URL: https://codereview.chromium.org/1686543003
diff --git a/tests/Float16Test.cpp b/tests/Float16Test.cpp
index 0a2c3d5..f437268 100644
--- a/tests/Float16Test.cpp
+++ b/tests/Float16Test.cpp
@@ -8,6 +8,7 @@
 #include "Test.h"
 #include "SkColor.h"
 #include "SkHalf.h"
+#include "SkOpts.h"
 #include "SkPixmap.h"
 
 static bool eq_within_half_float(float a, float b) {
@@ -50,3 +51,16 @@
         }
     }
 }
+
+DEF_TEST(float_to_half, reporter) {
+    const float    fs[] = {    1.0,    2.0,    3.0,    4.0,    5.0,    6.0,    7.0 };
+    const uint16_t hs[] = { 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700 };
+
+    uint16_t hscratch[7];
+    SkOpts::float_to_half(hscratch, fs, 7);
+    REPORTER_ASSERT(reporter, 0 == memcmp(hscratch, hs, sizeof(hs)));
+
+    float fscratch[7];
+    SkOpts::half_to_float(fscratch, hs, 7);
+    REPORTER_ASSERT(reporter, 0 == memcmp(fscratch, fs, sizeof(fs)));
+}