Support sRGB dsts in opt code

201295.jpg on HP z620 (300x280)

QCMS Xform                    0.418 ms
Skia NEW Xform                0.378 ms

Vs QCMS                       1.11x

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2078623002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review-Url: https://codereview.chromium.org/2078623002
diff --git a/bench/ColorCodecBench.cpp b/bench/ColorCodecBench.cpp
index 621fc41..9dde5da 100644
--- a/bench/ColorCodecBench.cpp
+++ b/bench/ColorCodecBench.cpp
@@ -12,9 +12,10 @@
 #include "SkCommandLineFlags.h"
 
 #if defined(SK_TEST_QCMS)
-DEFINE_bool(qcms, false, "Bench qcms color conversion");
+DEFINE_bool(qcms,       false, "Bench qcms color conversion");
 #endif
 DEFINE_bool(xform_only, false, "Only time the color xform, do not include the decode time");
+DEFINE_bool(srgb,       false, "Convert to srgb dst space");
 
 ColorCodecBench::ColorCodecBench(const char* name, sk_sp<SkData> encoded)
     : fEncoded(std::move(encoded))
@@ -170,7 +171,9 @@
 
 #if defined(SK_TEST_QCMS)
     if (FLAGS_qcms) {
-        fDstSpaceQCMS.reset(qcms_profile_from_memory(dstData->data(), dstData->size()));
+        fDstSpaceQCMS.reset(FLAGS_srgb ?
+                qcms_profile_sRGB() :
+                qcms_profile_from_memory(dstData->data(), dstData->size()));
         SkASSERT(fDstSpaceQCMS);
 
         // This call takes a non-trivial amount of time, but I think it's the most fair to
@@ -179,7 +182,8 @@
     } else
 #endif
     {
-        fDstSpace = SkColorSpace::NewICC(dstData->data(), dstData->size());
+        fDstSpace = FLAGS_srgb ? SkColorSpace::NewNamed(SkColorSpace::kSRGB_Named) :
+                                 SkColorSpace::NewICC(dstData->data(), dstData->size());
         SkASSERT(fDstSpace);
     }
 }
diff --git a/dm/DM.cpp b/dm/DM.cpp
index 6904408..f6dc398 100644
--- a/dm/DM.cpp
+++ b/dm/DM.cpp
@@ -747,6 +747,9 @@
         src = new ColorCodecSrc(colorImage, ColorCodecSrc::kDst_HPZR30w_Mode);
         push_src("image", "color_codec_HPZR30w", src);
 
+        src = new ColorCodecSrc(colorImage, ColorCodecSrc::kDst_sRGB_Mode);
+        push_src("image", "color_codec_sRGB", src);
+
 #if defined(SK_TEST_QCMS)
         src = new ColorCodecSrc(colorImage, ColorCodecSrc::kQCMS_HPZR30w_Mode);
         push_src("image", "color_codec_QCMS_HPZR30w", src);
diff --git a/dm/DMSrcSink.cpp b/dm/DMSrcSink.cpp
index 97f1e48..8afbfb9 100644
--- a/dm/DMSrcSink.cpp
+++ b/dm/DMSrcSink.cpp
@@ -886,9 +886,12 @@
         case kBaseline_Mode:
             canvas->drawBitmap(bitmap, 0, 0);
             break;
+        case kDst_sRGB_Mode:
         case kDst_HPZR30w_Mode: {
             sk_sp<SkColorSpace> srcSpace = sk_ref_sp(codec->getColorSpace());
-            sk_sp<SkColorSpace> dstSpace = SkColorSpace::NewICC(dstData->data(), dstData->size());
+            sk_sp<SkColorSpace> dstSpace = (kDst_sRGB_Mode == fMode) ?
+                    SkColorSpace::NewNamed(SkColorSpace::kSRGB_Named) :
+                    SkColorSpace::NewICC(dstData->data(), dstData->size());
             SkASSERT(dstSpace);
 
             std::unique_ptr<SkColorSpaceXform> xform = SkColorSpaceXform::New(srcSpace, dstSpace);
diff --git a/dm/DMSrcSink.h b/dm/DMSrcSink.h
index a2bfbca..be7d68e 100644
--- a/dm/DMSrcSink.h
+++ b/dm/DMSrcSink.h
@@ -215,10 +215,10 @@
         // monitor, you're in luck!  The unmarked outputs of this test should display
         // correctly on this monitor in the Chrome browser.  If not, it's useful to know
         // that this monitor has a profile that is fairly similar to Adobe RGB.
-        // TODO (msarett): Should we add a new test with a new monitor and verify that outputs
-        //                 look identical on two different dsts?
         kDst_HPZR30w_Mode,
 
+        kDst_sRGB_Mode,
+
 #if defined(SK_TEST_QCMS)
         // Use QCMS for color correction.
         kQCMS_HPZR30w_Mode,
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp
index 4c67e8d..f42811a 100644
--- a/src/core/SkColorSpaceXform.cpp
+++ b/src/core/SkColorSpaceXform.cpp
@@ -37,15 +37,35 @@
         return nullptr;
     }
 
-    if (SkColorSpace::k2Dot2Curve_GammaNamed == dstSpace->gammaNamed() &&
-        0.0f == srcToDst.getFloat(3, 0) &&
+    if (0.0f == srcToDst.getFloat(3, 0) &&
         0.0f == srcToDst.getFloat(3, 1) &&
         0.0f == srcToDst.getFloat(3, 2))
     {
-        if (SkColorSpace::kSRGB_GammaNamed == srcSpace->gammaNamed()) {
-            return std::unique_ptr<SkColorSpaceXform>(new SkSRGBTo2Dot2Xform(srcToDst));
-        } else if (SkColorSpace::k2Dot2Curve_GammaNamed == srcSpace->gammaNamed()) {
-            return std::unique_ptr<SkColorSpaceXform>(new Sk2Dot2To2Dot2Xform(srcToDst));
+        switch (srcSpace->gammaNamed()) {
+            case SkColorSpace::kSRGB_GammaNamed:
+                if (SkColorSpace::kSRGB_GammaNamed == dstSpace->gammaNamed()) {
+                    return std::unique_ptr<SkColorSpaceXform>(
+                            new SkFastXform<SkColorSpace::kSRGB_GammaNamed,
+                                            SkColorSpace::kSRGB_GammaNamed>(srcToDst));
+                } else if (SkColorSpace::k2Dot2Curve_GammaNamed == dstSpace->gammaNamed()) {
+                    return std::unique_ptr<SkColorSpaceXform>(
+                            new SkFastXform<SkColorSpace::kSRGB_GammaNamed,
+                                            SkColorSpace::k2Dot2Curve_GammaNamed>(srcToDst));
+                }
+                break;
+            case SkColorSpace::k2Dot2Curve_GammaNamed:
+                if (SkColorSpace::kSRGB_GammaNamed == dstSpace->gammaNamed()) {
+                    return std::unique_ptr<SkColorSpaceXform>(
+                            new SkFastXform<SkColorSpace::k2Dot2Curve_GammaNamed,
+                                            SkColorSpace::kSRGB_GammaNamed>(srcToDst));
+                } else if (SkColorSpace::k2Dot2Curve_GammaNamed == dstSpace->gammaNamed()) {
+                    return std::unique_ptr<SkColorSpaceXform>(
+                            new SkFastXform<SkColorSpace::k2Dot2Curve_GammaNamed,
+                                            SkColorSpace::k2Dot2Curve_GammaNamed>(srcToDst));
+                }
+                break;
+            default:
+                break;
         }
     }
 
@@ -90,23 +110,37 @@
 #endif
 }
 
-SkSRGBTo2Dot2Xform::SkSRGBTo2Dot2Xform(const SkMatrix44& srcToDst)
+template <SkColorSpace::GammaNamed Src, SkColorSpace::GammaNamed Dst>
+SkFastXform<Src, Dst>::SkFastXform(const SkMatrix44& srcToDst)
 {
     build_src_to_dst(fSrcToDst, srcToDst);
 }
 
-void SkSRGBTo2Dot2Xform::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const {
+template <>
+void SkFastXform<SkColorSpace::kSRGB_GammaNamed, SkColorSpace::kSRGB_GammaNamed>
+::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const
+{
+    SkOpts::color_xform_RGB1_srgb_to_srgb(dst, src, len, fSrcToDst);
+}
+
+template <>
+void SkFastXform<SkColorSpace::kSRGB_GammaNamed, SkColorSpace::k2Dot2Curve_GammaNamed>
+::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const
+{
     SkOpts::color_xform_RGB1_srgb_to_2dot2(dst, src, len, fSrcToDst);
 }
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-Sk2Dot2To2Dot2Xform::Sk2Dot2To2Dot2Xform(const SkMatrix44& srcToDst)
+template <>
+void SkFastXform<SkColorSpace::k2Dot2Curve_GammaNamed, SkColorSpace::kSRGB_GammaNamed>
+::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const
 {
-    build_src_to_dst(fSrcToDst, srcToDst);
+    SkOpts::color_xform_RGB1_2dot2_to_srgb(dst, src, len, fSrcToDst);
 }
 
-void Sk2Dot2To2Dot2Xform::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const {
+template <>
+void SkFastXform<SkColorSpace::k2Dot2Curve_GammaNamed, SkColorSpace::k2Dot2Curve_GammaNamed>
+::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const
+{
     SkOpts::color_xform_RGB1_2dot2_to_2dot2(dst, src, len, fSrcToDst);
 }
 
diff --git a/src/core/SkColorSpaceXform.h b/src/core/SkColorSpaceXform.h
index 1ea6080..e6abdbb 100644
--- a/src/core/SkColorSpaceXform.h
+++ b/src/core/SkColorSpaceXform.h
@@ -34,26 +34,14 @@
     virtual ~SkColorSpaceXform() {}
 };
 
-class SkSRGBTo2Dot2Xform : public SkColorSpaceXform {
+template <SkColorSpace::GammaNamed Src, SkColorSpace::GammaNamed Dst>
+class SkFastXform : public SkColorSpaceXform {
 public:
 
     void xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const override;
 
 private:
-    SkSRGBTo2Dot2Xform(const SkMatrix44& srcToDst);
-
-    float fSrcToDst[12];
-
-    friend class SkColorSpaceXform;
-};
-
-class Sk2Dot2To2Dot2Xform : public SkColorSpaceXform {
-public:
-
-    void xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const override;
-
-private:
-    Sk2Dot2To2Dot2Xform(const SkMatrix44& srcToDst);
+    SkFastXform(const SkMatrix44& srcToDst);
 
     float fSrcToDst[12];
 
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index b4145ac..a2de1f5 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -81,6 +81,10 @@
             sk_default::color_xform_RGB1_srgb_to_2dot2;
     decltype(color_xform_RGB1_2dot2_to_2dot2) color_xform_RGB1_2dot2_to_2dot2 =
             sk_default::color_xform_RGB1_2dot2_to_2dot2;
+    decltype(color_xform_RGB1_srgb_to_srgb)   color_xform_RGB1_srgb_to_srgb   =
+            sk_default::color_xform_RGB1_srgb_to_srgb;
+    decltype(color_xform_RGB1_2dot2_to_srgb)  color_xform_RGB1_2dot2_to_srgb  =
+            sk_default::color_xform_RGB1_2dot2_to_srgb;
 
     // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
     void Init_ssse3();
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 1c33529..186fa73 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -74,6 +74,10 @@
                                                    const float srcToDstMatrix[16]);
     extern void (*color_xform_RGB1_2dot2_to_2dot2)(uint32_t* dst, const uint32_t* src, int len,
                                                    const float srcToDstMatrix[16]);
+    extern void (*color_xform_RGB1_srgb_to_srgb) (uint32_t* dst, const uint32_t* src, int len,
+                                                  const float srcToDstMatrix[16]);
+    extern void (*color_xform_RGB1_2dot2_to_srgb)(uint32_t* dst, const uint32_t* src, int len,
+                                                  const float srcToDstMatrix[16]);
 }
 
 #endif//SkOpts_DEFINED
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
index 5af5c2a..2c14c80 100644
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -157,13 +157,33 @@
     return 255.0f * x2.invert() * x32 * x64.invert();
 }
 
+static Sk4f linear_to_srgb(const Sk4f& x) {
+    // Approximation of the sRGB gamma curve (within 1 when scaled to 8-bit pixels).
+    // For 0.00000f <= x <  0.00349f,    12.92 * x
+    // For 0.00349f <= x <= 1.00000f,    0.679*(x.^0.5) + 0.423*x.^(0.25) - 0.101
+    // Note that 0.00349 was selected because it is a point where both functions produce the
+    // same pixel value when rounded.
+    auto rsqrt = x.rsqrt(),
+         sqrt  = rsqrt.invert(),
+         ftrt  = rsqrt.rsqrt();
+
+    auto hi = (-0.101115084998961f * 255.0f) +
+              (+0.678513029959381f * 255.0f) * sqrt +
+              (+0.422602055039580f * 255.0f) * ftrt;
+
+    auto lo = (12.92f * 255.0f) * x;
+
+    auto mask = (x < 0.00349f);
+    return mask.thenElse(lo, hi);
+}
+
 static Sk4f clamp_0_to_255(const Sk4f& x) {
     // The order of the arguments is important here.  We want to make sure that NaN
     // clamps to zero.  Note that max(NaN, 0) = 0, while max(0, NaN) = NaN.
     return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f);
 }
 
-template <const float (&linear_from_curve)[256]>
+template <const float (&linear_from_curve)[256], Sk4f (*linear_to_curve)(const Sk4f&)>
 static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
                              const float matrix[16]) {
     // Load transformation matrix.
@@ -192,9 +212,9 @@
              dstBlues  = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues;
 
         // Convert to dst gamma.
-        dstReds   = linear_to_2dot2(dstReds);
-        dstGreens = linear_to_2dot2(dstGreens);
-        dstBlues  = linear_to_2dot2(dstBlues);
+        dstReds   = linear_to_curve(dstReds);
+        dstGreens = linear_to_curve(dstGreens);
+        dstBlues  = linear_to_curve(dstBlues);
 
         // Clamp floats to byte range.
         dstReds   = clamp_0_to_255(dstReds);
@@ -223,7 +243,7 @@
         auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b;
 
         // Convert to dst gamma.
-        dstPixel = linear_to_2dot2(dstPixel);
+        dstPixel = linear_to_curve(dstPixel);
 
         // Clamp floats to byte range.
         dstPixel = clamp_0_to_255(dstPixel);
@@ -242,12 +262,22 @@
 
 static void color_xform_RGB1_srgb_to_2dot2(uint32_t* dst, const uint32_t* src, int len,
                                            const float matrix[16]) {
-    color_xform_RGB1<linear_from_srgb>(dst, src, len, matrix);
+    color_xform_RGB1<linear_from_srgb, linear_to_2dot2>(dst, src, len, matrix);
 }
 
 static void color_xform_RGB1_2dot2_to_2dot2(uint32_t* dst, const uint32_t* src, int len,
                                            const float matrix[16]) {
-    color_xform_RGB1<linear_from_2dot2>(dst, src, len, matrix);
+    color_xform_RGB1<linear_from_2dot2, linear_to_2dot2>(dst, src, len, matrix);
+}
+
+static void color_xform_RGB1_srgb_to_srgb(uint32_t* dst, const uint32_t* src, int len,
+                                           const float matrix[16]) {
+    color_xform_RGB1<linear_from_srgb, linear_to_srgb>(dst, src, len, matrix);
+}
+
+static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, int len,
+                                           const float matrix[16]) {
+    color_xform_RGB1<linear_from_2dot2, linear_to_srgb>(dst, src, len, matrix);
 }
 
 }  // namespace SK_OPTS_NS
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index bdc6d77..9751c4d 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -124,8 +124,12 @@
     bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(fVec)); }
 
     SkNx thenElse(const SkNx& t, const SkNx& e) const {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_blendv_ps(e.fVec, t.fVec, fVec);
+#else
         return _mm_or_ps(_mm_and_ps   (fVec, t.fVec),
                          _mm_andnot_ps(fVec, e.fVec));
+#endif
     }
 
     __m128 fVec;
diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp
index 4161571..4489242 100644
--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@@ -23,5 +23,7 @@
 
         color_xform_RGB1_srgb_to_2dot2  = sk_sse41::color_xform_RGB1_srgb_to_2dot2;
         color_xform_RGB1_2dot2_to_2dot2 = sk_sse41::color_xform_RGB1_2dot2_to_2dot2;
+        color_xform_RGB1_srgb_to_srgb   = sk_sse41::color_xform_RGB1_srgb_to_srgb;
+        color_xform_RGB1_2dot2_to_srgb  = sk_sse41::color_xform_RGB1_2dot2_to_srgb;
     }
 }