Add clamp to sk_linear_to_srgb, reorder instructions
Improves performance for xforms toSRGB and to2Dot2. Seems
more optimal to save clamping until the end. That way we
don't stall the mul pipeline with a min/max.
toSRGB: 371us -> 346us
to2Dot2: 404us -> 387us
FWIW, it probably makes sense to clamp inside
sk_linear_to_srgb anyway. If not, we should potentially
provide two versions (one that clamps and one that
doesn't).
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2173803002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review-Url: https://codereview.chromium.org/2173803002
diff --git a/src/core/SkSRGB.h b/src/core/SkSRGB.h
index d3baa74..08ba860 100644
--- a/src/core/SkSRGB.h
+++ b/src/core/SkSRGB.h
@@ -22,6 +22,12 @@
extern const float sk_linear_from_srgb[256];
+static inline Sk4f sk_clamp_0_255(const Sk4f& x) {
+ // The order of the arguments is important here. We want to make sure that NaN
+ // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN.
+ return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f);
+}
+
static inline Sk4i sk_linear_to_srgb(const Sk4f& x) {
// Approximation of the sRGB gamma curve (within 1 when scaled to 8-bit pixels).
//
@@ -40,7 +46,7 @@
+ (+0.687999f * 255.0f) * sqrt
+ (+0.412999f * 255.0f) * ftrt;
- return SkNx_cast<int>( (x < 0.0048f).thenElse(lo, hi) );
+ return SkNx_cast<int>(sk_clamp_0_255((x < 0.0048f).thenElse(lo, hi)));
}
#endif//SkSRGB_DEFINED
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
index 0ab9000..b4eb9a2 100644
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -16,12 +16,6 @@
namespace SK_OPTS_NS {
-static Sk4f clamp_0_1(const Sk4f& x) {
- // The order of the arguments is important here. We want to make sure that NaN
- // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN.
- return Sk4f::Min(Sk4f::Max(x, 0.0f), 1.0f);
-}
-
static Sk4i linear_to_2dot2(const Sk4f& x) {
// x^(29/64) is a very good approximation of the true value, x^(1/2.2).
auto x2 = x.rsqrt(), // x^(-1/2)
@@ -29,7 +23,7 @@
x64 = x32.rsqrt(); // x^(+1/64)
// 29 = 32 - 2 - 1
- return Sk4f_round(255.0f * x2.invert() * x32 * x64.invert());
+ return Sk4f_round(sk_clamp_0_255(255.0f * x2.invert() * x32 * x64.invert()));
}
enum DstGamma {
@@ -82,10 +76,9 @@
Sk4i (*linear_to_curve)(const Sk4f&) =
(kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2;
- auto reds = linear_to_curve(clamp_0_1(dstReds));
- auto greens = linear_to_curve(clamp_0_1(dstGreens));
- auto blues = linear_to_curve(clamp_0_1(dstBlues));
-
+ auto reds = linear_to_curve(dstReds);
+ auto greens = linear_to_curve(dstGreens);
+ auto blues = linear_to_curve(dstBlues);
auto rgba = (reds << SK_R32_SHIFT)
| (greens << SK_G32_SHIFT)
@@ -155,7 +148,7 @@
Sk4i (*linear_to_curve)(const Sk4f&) =
(kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2;
- auto pixel = linear_to_curve(clamp_0_1(dstPixel));
+ auto pixel = linear_to_curve(dstPixel);
uint32_t rgba;
SkNx_cast<uint8_t>(pixel).store(&rgba);