accum_565 and accum_f16

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=5125
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: I2e338ae14db0068d9a09e16a0678dd2ee9f97efd
Reviewed-on: https://skia-review.googlesource.com/5125
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
diff --git a/src/image/SkImageShader.cpp b/src/image/SkImageShader.cpp
index 0a5e7c1..3ce52b0 100644
--- a/src/image/SkImageShader.cpp
+++ b/src/image/SkImageShader.cpp
@@ -290,8 +290,8 @@
     switch (info.colorType()) {
         case kRGBA_8888_SkColorType:
         case kBGRA_8888_SkColorType:
-//      case   kRGB_565_SkColorType:
-//      case  kRGBA_F16_SkColorType:
+        case   kRGB_565_SkColorType:
+        case  kRGBA_F16_SkColorType:
             break;
         default: return false;
     }
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 3d6ed8a..f17d7ac 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -765,17 +765,67 @@
     b = fx * fy;
 };
 
-STAGE(accum_565, true) {}  // TODO
-STAGE(accum_f16, true) {}  // TODO
-
-STAGE(accum_8888, true) {
+template <typename T>
+SI SkNi offset_and_ptr(T** ptr, const void* ctx, const SkNf& x, const SkNf& y) {
     auto sc = (const SkImageShaderContext*)ctx;
 
-    SkNi ix = SkNx_cast<int>(r),
-         iy = SkNx_cast<int>(g);
+    SkNi ix = SkNx_cast<int>(x),
+         iy = SkNx_cast<int>(y);
     SkNi offset = iy*sc->stride + ix;
 
-    auto p = (const uint32_t*)sc->pixels;
+    *ptr = (const T*)sc->pixels;
+    return offset;
+}
+
+STAGE(accum_565, true) {
+    const uint16_t* p;
+    SkNi offset = offset_and_ptr(&p, ctx, r, g);
+
+    uint16_t px[N];
+    for (size_t i = 0; i < N; i++) {
+        if (kIsTail && i >= tail) {
+            px[i] = 0;
+            continue;
+        }
+        px[i] = p[offset[i]];
+    }
+    SkNf R,G,B;
+    from_565(SkNh::Load(px), &R, &G, &B);
+
+    SkNf scale = b;
+    dr += scale * R;
+    dg += scale * G;
+    db += scale * B;
+    da += scale;
+}
+
+STAGE(accum_f16, true) {
+    const uint64_t* p;
+    SkNi offset = offset_and_ptr(&p, ctx, r, g);
+
+    uint16_t R[N], G[N], B[N], A[N];
+    for (size_t i = 0; i < N; i++) {
+        if (kIsTail && i >= tail) {
+            R[i] = G[i] = B[i] = A[i] = 0;
+            continue;
+        }
+        uint64_t rgba = p[offset[i]];
+        R[i] = rgba >>  0;
+        G[i] = rgba >> 16;
+        B[i] = rgba >> 32;
+        A[i] = rgba >> 48;
+    }
+    SkNf scale = b;
+    dr += scale * SkHalfToFloat_finite_ftz(SkNh::Load(R));
+    dg += scale * SkHalfToFloat_finite_ftz(SkNh::Load(G));
+    db += scale * SkHalfToFloat_finite_ftz(SkNh::Load(B));
+    da += scale * SkHalfToFloat_finite_ftz(SkNh::Load(A));
+}
+
+STAGE(accum_8888, true) {
+    const uint32_t* p;
+    SkNi offset = offset_and_ptr(&p, ctx, r, g);
+
     uint8_t R[N], G[N], B[N], A[N];
     for (size_t i = 0; i < N; i++) {
         if (kIsTail && i >= tail) {
@@ -797,13 +847,9 @@
 }
 
 STAGE(accum_srgb, true) {
-    auto sc = (const SkImageShaderContext*)ctx;
+    const uint32_t* p;
+    SkNi offset = offset_and_ptr(&p, ctx, r, g);
 
-    SkNi ix = SkNx_cast<int>(r),
-         iy = SkNx_cast<int>(g);
-    SkNi offset = iy*sc->stride + ix;
-
-    auto p = (const uint32_t*)sc->pixels;
     uint8_t R[N], G[N], B[N], A[N];
     for (size_t i = 0; i < N; i++) {
         if (kIsTail && i >= tail) {