Correct sRGB <-> linear everywhere.

This trims the SkPM4fPriv methods down to just foolproof methods.
(Anything trying to build these itself is probably wrong.)

Things like Sk4f srgb_to_linear(Sk4f) can't really exist anymore,
at least not efficiently, so this refactor is somewhat more invasive
than you might think.  Generally this means things using to_4f() are
also making a misstep... that's gone too.

It also does not make sense to try to play games with linear floats
with 255 bias any more.  That hack can't work with real sRGB coding.

Rather than update them, I've removed a couple of L32 xfermode fast
paths.  I'd even rather drop it entirely...

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2163683002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review-Url: https://codereview.chromium.org/2163683002
diff --git a/bench/SkBlend_optsBench.cpp b/bench/SkBlend_optsBench.cpp
index c290714..184e933 100644
--- a/bench/SkBlend_optsBench.cpp
+++ b/bench/SkBlend_optsBench.cpp
@@ -19,13 +19,39 @@
 
 #define INNER_LOOPS 10
 
+static inline void brute_srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+    auto d = Sk4f_fromS32(*dst),
+         s = Sk4f_fromS32( src);
+    *dst = Sk4f_toS32(s + d * (1.0f - s[3]));
+}
+
+static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+    if (src >= 0xFF000000) {
+        *dst = src;
+        return;
+    }
+    brute_srcover_srgb_srgb_1(dst, src);
+}
+
 static void brute_force_srcover_srgb_srgb(
     uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
     while (ndst > 0) {
         int n = SkTMin(ndst, nsrc);
 
         for (int i = 0; i < n; i++) {
-            srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
+            brute_srcover_srgb_srgb_1(dst++, src[i]);
+        }
+        ndst -= n;
+    }
+}
+
+static void trivial_srcover_srgb_srgb(
+    uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+    while (ndst > 0) {
+        int n = SkTMin(ndst, nsrc);
+
+        for (int i = 0; i < n; i++) {
+            srcover_srgb_srgb_1(dst++, src[i]);
         }
         ndst -= n;
     }
@@ -36,8 +62,8 @@
     uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
 
     auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
-        srcover_srgb8888_srgb_1(dst++, *src++);
-        srcover_srgb8888_srgb_1(dst, *src);
+        srcover_srgb_srgb_1(dst++, *src++);
+        srcover_srgb_srgb_1(dst, *src);
     };
 
     while (ndst >0) {
@@ -62,24 +88,12 @@
         } while (dsrc < end);
 
         if ((count & 1) != 0) {
-            srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
-                                    *reinterpret_cast<const uint32_t*>(dsrc));
+            srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
+                                *reinterpret_cast<const uint32_t*>(dsrc));
         }
     }
 }
 
-static void trivial_srcover_srgb_srgb(
-    uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
-    while (ndst > 0) {
-        int n = SkTMin(ndst, nsrc);
-
-        for (int i = 0; i < n; i++) {
-            srcover_srgb8888_srgb_1(dst++, src[i]);
-        }
-        ndst -= n;
-    }
-}
-
 class SrcOverVSkOptsBruteForce {
 public:
     static SkString Name() { return SkString{"VSkOptsBruteForce"}; }