Add F16 support to SkPNGImageEncoder

BUG=skia:

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD

Change-Id: Ifd221365a7b9f9a4a4fc5382621e0da7189e1148
Reviewed-on: https://skia-review.googlesource.com/6526
Reviewed-by: Mike Klein <mtklein@chromium.org>
Reviewed-by: Leon Scroggins <scroggo@google.com>
Commit-Queue: Matt Sarett <msarett@google.com>
diff --git a/gm/encode-srgb.cpp b/gm/encode-srgb.cpp
index b9fe65b..d4bd782 100644
--- a/gm/encode-srgb.cpp
+++ b/gm/encode-srgb.cpp
@@ -10,6 +10,7 @@
 #include "Resources.h"
 #include "SkCanvas.h"
 #include "SkCodec.h"
+#include "SkColorSpace_Base.h"
 #include "SkData.h"
 #include "SkImageEncoderPriv.h"
 #include "SkPM4f.h"
@@ -24,6 +25,18 @@
     return (a + b - 1) / b;
 }
 
+sk_sp<SkColorSpace> fix_for_colortype(sk_sp<SkColorSpace> colorSpace, SkColorType colorType) {
+    if (kRGBA_F16_SkColorType == colorType) {
+        if (!colorSpace) {
+            return SkColorSpace::MakeNamed(SkColorSpace::kSRGBLinear_Named);
+        }
+
+        return as_CSB(colorSpace)->makeLinearGamma();
+    }
+
+    return colorSpace;
+}
+
 static void make_index8(SkBitmap* bitmap, SkAlphaType alphaType, sk_sp<SkColorSpace> colorSpace) {
     const SkColor colors[] = {
             0x800000FF, 0x8000FF00, 0x80FF0000, 0x80FFFF00,
@@ -73,11 +86,13 @@
         return;
     }
 
-    sk_sp<SkData> data = GetResourceAsData("color_wheel.png");
+    const char* resource = (kOpaque_SkAlphaType == alphaType) ? "color_wheel.jpg"
+                                                              : "color_wheel.png";
+    sk_sp<SkData> data = GetResourceAsData(resource);
     std::unique_ptr<SkCodec> codec(SkCodec::NewFromData(data));
     SkImageInfo dstInfo = codec->getInfo().makeColorType(colorType)
                                           .makeAlphaType(alphaType)
-                                          .makeColorSpace(colorSpace);
+                                          .makeColorSpace(fix_for_colortype(colorSpace, colorType));
     bitmap->allocPixels(dstInfo);
     codec->getPixels(dstInfo, bitmap->getPixels(), bitmap->rowBytes());
 }
@@ -107,12 +122,16 @@
     }
 
     SkISize onISize() override {
-        return SkISize::Make(imageWidth * 2, imageHeight * 4);
+        return SkISize::Make(imageWidth * 2, imageHeight * 9);
     }
 
     void onDraw(SkCanvas* canvas) override {
-        const SkColorType colorTypes[] = { kN32_SkColorType, kIndex_8_SkColorType, };
-        const SkAlphaType alphaTypes[] = { kUnpremul_SkAlphaType, kPremul_SkAlphaType, };
+        const SkColorType colorTypes[] = {
+                kN32_SkColorType, kRGBA_F16_SkColorType, kIndex_8_SkColorType,
+        };
+        const SkAlphaType alphaTypes[] = {
+                kUnpremul_SkAlphaType, kPremul_SkAlphaType, kOpaque_SkAlphaType,
+        };
         const sk_sp<SkColorSpace> colorSpaces[] = {
                 nullptr, SkColorSpace::MakeNamed(SkColorSpace::kSRGB_Named),
         };
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 3d1bb88..ac1e3b9 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -69,7 +69,7 @@
     M(load_565)  M(store_565)                                    \
     M(load_f16)  M(store_f16)                                    \
     M(load_8888) M(store_8888)                                   \
-    M(load_u16_be) M(load_rgb_u16_be)                            \
+    M(load_u16_be) M(load_rgb_u16_be) M(store_u16_be)            \
     M(load_tables_u16_be) M(load_tables_rgb_u16_be)              \
     M(load_tables) M(store_tables)                               \
     M(scale_u8) M(scale_1_float)                                 \
diff --git a/src/images/SkPNGImageEncoder.cpp b/src/images/SkPNGImageEncoder.cpp
index 55aead2..48aca8b 100644
--- a/src/images/SkPNGImageEncoder.cpp
+++ b/src/images/SkPNGImageEncoder.cpp
@@ -83,6 +83,17 @@
         case kIndex_8_SkColorType:
         case kGray_8_SkColorType:
             return transform_scanline_memcpy;
+        case kRGBA_F16_SkColorType:
+            switch (info.alphaType()) {
+                case kOpaque_SkAlphaType:
+                case kUnpremul_SkAlphaType:
+                    return transform_scanline_F16;
+                case kPremul_SkAlphaType:
+                    return transform_scanline_F16_premul;
+                default:
+                    SkASSERT(false);
+                    return nullptr;
+            }
         default:
             SkASSERT(false);
             return nullptr;
@@ -168,19 +179,8 @@
     if (!pixmap.addr() || pixmap.info().isEmpty()) {
         return false;
     }
-    const SkColorType colorType = pixmap.colorType();
-    switch (colorType) {
-        case kIndex_8_SkColorType:
-        case kGray_8_SkColorType:
-        case kRGBA_8888_SkColorType:
-        case kBGRA_8888_SkColorType:
-        case kARGB_4444_SkColorType:
-        case kRGB_565_SkColorType:
-            break;
-        default:
-            return false;
-    }
 
+    const SkColorType colorType = pixmap.colorType();
     const SkAlphaType alphaType = pixmap.alphaType();
     switch (alphaType) {
         case kUnpremul_SkAlphaType:
@@ -197,12 +197,23 @@
     }
 
     const bool isOpaque = (kOpaque_SkAlphaType == alphaType);
-    const int bitDepth = 8;
+    int bitDepth = 8;
     png_color_8 sig_bit;
     sk_bzero(&sig_bit, sizeof(png_color_8));
-
     int pngColorType;
     switch (colorType) {
+        case kRGBA_F16_SkColorType:
+            if (!pixmap.colorSpace() || !pixmap.colorSpace()->gammaIsLinear()) {
+                return false;
+            }
+
+            sig_bit.red = 16;
+            sig_bit.green = 16;
+            sig_bit.blue = 16;
+            sig_bit.alpha = 16;
+            bitDepth = 16;
+            pngColorType = isOpaque ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGB_ALPHA;
+            break;
         case kIndex_8_SkColorType:
             sig_bit.red = 8;
             sig_bit.green = 8;
@@ -240,6 +251,7 @@
         default:
             return false;
     }
+
     if (kIndex_8_SkColorType == colorType) {
         SkColorTable* ctable = pixmap.ctable();
         if (!ctable || ctable->count() == 0) {
@@ -250,9 +262,25 @@
         // When ctable->count() <= 16, we could potentially use 1, 2,
         // or 4 bit indices.
     }
+
     return do_encode(stream, pixmap, pngColorType, bitDepth, sig_bit);
 }
 
+static int num_components(int pngColorType) {
+    switch (pngColorType) {
+        case PNG_COLOR_TYPE_PALETTE:
+        case PNG_COLOR_TYPE_GRAY:
+            return 1;
+        case PNG_COLOR_TYPE_RGB:
+            return 3;
+        case PNG_COLOR_TYPE_RGBA:
+            return 4;
+        default:
+            SkASSERT(false);
+            return 0;
+    }
+}
+
 static bool do_encode(SkWStream* stream, const SkPixmap& pixmap,
                       int pngColorType, int bitDepth, png_color_8& sig_bit) {
     png_structp png_ptr;
@@ -308,12 +336,18 @@
 
     png_set_sBIT(png_ptr, info_ptr, &sig_bit);
     png_write_info(png_ptr, info_ptr);
+    int pngBytesPerPixel = num_components(pngColorType) * (bitDepth / 8);
+    if (kRGBA_F16_SkColorType == pixmap.colorType() && kOpaque_SkAlphaType == pixmap.alphaType()) {
+        // For kOpaque, kRGBA_F16, we will keep the row as RGBA and tell libpng
+        // to skip the alpha channel.
+        png_set_filler(png_ptr, 0, PNG_FILLER_AFTER);
+        pngBytesPerPixel = 8;
+    }
 
-    const char* srcImage = (const char*)pixmap.addr();
-    SkAutoSTMalloc<1024, char> rowStorage(pixmap.width() << 2);
+    SkAutoSTMalloc<1024, char> rowStorage(pixmap.width() * pngBytesPerPixel);
     char* storage = rowStorage.get();
+    const char* srcImage = (const char*)pixmap.addr();
     transform_scanline_proc proc = choose_proc(pixmap.info());
-
     for (int y = 0; y < pixmap.height(); y++) {
         png_bytep row_ptr = (png_bytep)storage;
         proc(storage, srcImage, pixmap.width(), SkColorTypeBytesPerPixel(pixmap.colorType()));
diff --git a/src/images/transform_scanline.h b/src/images/transform_scanline.h
index 1ce67cd..1c26348 100644
--- a/src/images/transform_scanline.h
+++ b/src/images/transform_scanline.h
@@ -96,7 +96,7 @@
 
 template <bool kIsRGBA>
 static inline void transform_scanline_unpremultiply(char* SK_RESTRICT dst,
-                                                    const char* SK_RESTRICT src, int width, int) {
+                                                    const char* SK_RESTRICT src, int width) {
     const uint32_t* srcP = (const SkPMColor*)src;
     const SkUnPreMultiply::Scale* table = SkUnPreMultiply::GetScaleTable();
 
@@ -132,21 +132,20 @@
  * Transform from legacy kPremul, kRGBA_8888_SkColorType to 4-bytes-per-pixel unpremultiplied RGBA.
  */
 static void transform_scanline_rgbA(char* SK_RESTRICT dst, const char* SK_RESTRICT src, int width,
-                                    int bpp) {
-    transform_scanline_unpremultiply<true>(dst, src, width, bpp);
+                                    int) {
+    transform_scanline_unpremultiply<true>(dst, src, width);
 }
 
 /**
  * Transform from legacy kPremul, kBGRA_8888_SkColorType to 4-bytes-per-pixel unpremultiplied RGBA.
  */
 static void transform_scanline_bgrA(char* SK_RESTRICT dst, const char* SK_RESTRICT src, int width,
-                                    int bpp) {
-    transform_scanline_unpremultiply<false>(dst, src, width, bpp);
+                                    int) {
+    transform_scanline_unpremultiply<false>(dst, src, width);
 }
 
 template <bool kIsRGBA>
-static inline void transform_scanline_unpremultiply_sRGB(void* dst, const void* src, int width, int)
-{
+static inline void transform_scanline_unpremultiply_sRGB(void* dst, const void* src, int width) {
     SkRasterPipeline p;
     p.append(SkRasterPipeline::load_8888, &src);
     if (!kIsRGBA) {
@@ -164,16 +163,16 @@
  * Transform from kPremul, kRGBA_8888_SkColorType to 4-bytes-per-pixel unpremultiplied RGBA.
  */
 static void transform_scanline_srgbA(char* SK_RESTRICT dst, const char* SK_RESTRICT src,
-                                     int width, int bpp) {
-    transform_scanline_unpremultiply_sRGB<true>(dst, src, width, bpp);
+                                     int width, int) {
+    transform_scanline_unpremultiply_sRGB<true>(dst, src, width);
 }
 
 /**
  * Transform from kPremul, kBGRA_8888_SkColorType to 4-bytes-per-pixel unpremultiplied RGBA.
  */
 static void transform_scanline_sbgrA(char* SK_RESTRICT dst, const char* SK_RESTRICT src,
-                                     int width, int bpp) {
-    transform_scanline_unpremultiply_sRGB<false>(dst, src, width, bpp);
+                                     int width, int) {
+    transform_scanline_unpremultiply_sRGB<false>(dst, src, width);
 }
 
 /**
@@ -219,3 +218,28 @@
         *dst++ = a;
     }
 }
+
+/**
+ * Transform from kRGBA_F16 to 4-bytes-per-pixel RGBA.
+ */
+static void transform_scanline_F16(char* SK_RESTRICT dst, const char* SK_RESTRICT src, int width,
+                                   int) {
+    SkRasterPipeline p;
+    p.append(SkRasterPipeline::load_f16, (const void**) &src);
+    p.append(SkRasterPipeline::to_srgb);
+    p.append(SkRasterPipeline::store_u16_be, (void**) &dst);
+    p.run(0, 0, width);
+}
+
+/**
+ * Transform from kPremul, kRGBA_F16 to 4-bytes-per-pixel RGBA.
+ */
+static void transform_scanline_F16_premul(char* SK_RESTRICT dst, const char* SK_RESTRICT src,
+                                          int width, int) {
+    SkRasterPipeline p;
+    p.append(SkRasterPipeline::load_f16, (const void**) &src);
+    p.append(SkRasterPipeline::unpremul);
+    p.append(SkRasterPipeline::to_srgb);
+    p.append(SkRasterPipeline::store_u16_be, (void**) &dst);
+    p.run(0, 0, width);
+}
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 0dfb43a..fe74a77 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -701,6 +701,11 @@
                 hi = _mm256_extractf128_si256(src.fVec, 1);
         return _mm_packus_epi32(lo, hi);
     }
+
+    template<> AI /*static*/ Sk8h SkNx_cast<uint16_t>(const Sk8f& src) {
+        return SkNx_cast<uint16_t>(SkNx_cast<int>(src));
+    }
+
     template<> AI /*static*/ Sk8b SkNx_cast<uint8_t>(const Sk8i& src) {
         auto _16 = SkNx_cast<uint16_t>(src);
         return _mm_packus_epi16(_16.fVec, _16.fVec);
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index f496bd5..00b6ac7 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -608,6 +608,23 @@
     a = 1.0f;
 }
 
+STAGE_CTX(store_u16_be, uint64_t**) {
+    auto to_u16_be = [](const SkNf& x) {
+        SkNh x16 = SkNx_cast<uint16_t>(65535.0f * x);
+        return (x16 << 8) | (x16 >> 8);
+    };
+
+    auto ptr = *ctx + x;
+    SkNx<N, uint64_t> px;
+    SkNh::Store4(tail ? (void*)&px : (void*)ptr, to_u16_be(r),
+                                                 to_u16_be(g),
+                                                 to_u16_be(b),
+                                                 to_u16_be(a));
+    if (tail) {
+        store(tail, px, ptr);
+    }
+}
+
 STAGE_CTX(load_tables, const LoadTablesContext*) {
     auto ptr = (const uint32_t*)ctx->fSrc + x;