SoftLight with SkPMFloat
SSE speeds up about 4.5x over existing integer SSE,
NEON speeds up about 3x over serial integer code.
We expect 1-2 bit component diffs in the usual GMs.
Still guarded by SK_SUPPORT_LEGACY_XFERMODES,
which I'll now try to lift in Chrome.
BUG=skia:
Review URL: https://codereview.chromium.org/1221493002
diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h
index b587183..28fd9f1 100644
--- a/src/core/Sk4pxXfermode.h
+++ b/src/core/Sk4pxXfermode.h
@@ -141,6 +141,31 @@
otherwise));
return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
}
+XFERMODE(SoftLight) {
+ auto sa = s.alphas(),
+ da = d.alphas(),
+ isa = Sk4f(1)-sa,
+ ida = Sk4f(1)-da;
+
+ // Some common terms.
+ auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)),
+ s2 = Sk4f(2)*s,
+ m4 = Sk4f(4)*m;
+
+ // The logic forks three ways:
+ // 1. dark src?
+ // 2. light src, dark dst?
+ // 3. light src, light dst?
+ auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1.
+ darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2.
+ liteDst = m.sqrt() - m, // Used in case 3.
+ liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d < da).thenElse(darkDst, liteDst); // Case 2 or 3?
+
+ auto alpha = s + d*isa;
+ auto colors = s*ida + d*isa + (s2 < sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
+
+ return alpha * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1);
+}
#undef XFERMODE
// A reasonable fallback mode for doing AA is to simply apply the transfermode first,
@@ -244,6 +269,7 @@
case SkXfermode::kColorDodge_Mode: return SkTPMFloatXfermode<ColorDodge>::Create(rec);
case SkXfermode::kColorBurn_Mode: return SkTPMFloatXfermode<ColorBurn>::Create(rec);
+ case SkXfermode::kSoftLight_Mode: return SkTPMFloatXfermode<SoftLight>::Create(rec);
#endif
default: break;
}
diff --git a/src/opts/SkXfermode_opts_SSE2.cpp b/src/opts/SkXfermode_opts_SSE2.cpp
index ca26263..2024a17 100644
--- a/src/opts/SkXfermode_opts_SSE2.cpp
+++ b/src/opts/SkXfermode_opts_SSE2.cpp
@@ -516,15 +516,13 @@
SkXfermode::Mode mode) {
SkXfermodeProcSIMD proc = nullptr;
switch (mode) {
- // TODO(mtklein): Sk4pxXfermode has these now. Clean up.
+ // TODO(mtklein): Sk4pxXfermode has these now. Clean up the whole file!
case SkProcCoeffXfermode::kOverlay_Mode: proc = overlay_modeproc_SSE2; break;
case SkProcCoeffXfermode::kDarken_Mode: proc = darken_modeproc_SSE2; break;
case SkProcCoeffXfermode::kLighten_Mode: proc = lighten_modeproc_SSE2; break;
case SkProcCoeffXfermode::kHardLight_Mode: proc = hardlight_modeproc_SSE2; break;
case SkProcCoeffXfermode::kColorDodge_Mode: proc = colordodge_modeproc_SSE2; break;
case SkProcCoeffXfermode::kColorBurn_Mode: proc = colorburn_modeproc_SSE2; break;
-
- // TODO(mtklein): implement this with SkPMFloat.
case SkProcCoeffXfermode::kSoftLight_Mode: proc = softlight_modeproc_SSE2; break;
default: break;
}