blob: ee957e147b614fcc507177249164f3720c85d01f [file] [log] [blame]
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkRasterPipeline_opts_DEFINED
9#define SkRasterPipeline_opts_DEFINED
10
Mike Klein1f49f262016-10-31 19:49:27 -040011#include "SkColorPriv.h"
raftias25636012016-11-11 15:27:39 -080012#include "SkColorLookUpTable.h"
Matt Sarettdb4d4062016-11-16 16:07:15 -050013#include "SkColorSpaceXform_A2B.h"
14#include "SkColorSpaceXformPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040015#include "SkHalf.h"
16#include "SkPM4f.h"
mtklein125b2aa2016-11-04 13:41:34 -070017#include "SkPM4fPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040018#include "SkRasterPipeline.h"
19#include "SkSRGB.h"
mtklein125b2aa2016-11-04 13:41:34 -070020#include "SkUtils.h"
Mike Klein2878e762016-10-19 21:05:17 -040021#include <utility>
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040022
Mike Kleinaebfb452016-10-25 10:27:33 -040023namespace {
24
Mike Klein2878e762016-10-19 21:05:17 -040025#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
26 static constexpr int N = 8;
27#else
28 static constexpr int N = 4;
29#endif
30
mtkleina4a44882016-11-04 13:20:07 -070031 using SkNf = SkNx<N, float>;
32 using SkNi = SkNx<N, int>;
33 using SkNh = SkNx<N, uint16_t>;
Mike Klein06a65e22016-11-17 12:39:09 -050034 using SkNb = SkNx<N, uint8_t>;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040035
Mike Kleinaebfb452016-10-25 10:27:33 -040036 struct BodyStage;
37 struct TailStage;
38
39 using Body = void(SK_VECTORCALL *)(BodyStage*, size_t, SkNf,SkNf,SkNf,SkNf,
40 SkNf,SkNf,SkNf,SkNf);
41 using Tail = void(SK_VECTORCALL *)(TailStage*, size_t, size_t, SkNf,SkNf,SkNf,SkNf,
42 SkNf,SkNf,SkNf,SkNf);
43 struct BodyStage { Body next; void* ctx; };
44 struct TailStage { Tail next; void* ctx; };
45
46} // namespace
Mike Klein2878e762016-10-19 21:05:17 -040047
Mike Klein04adfda2016-10-12 09:52:55 -040048#define SI static inline
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040049
Mike Klein49372e62016-10-20 18:05:23 -040050// Stages are logically a pipeline, and physically are contiguous in an array.
51// To get to the next stage, we just increment our pointer to the next array element.
Mike Kleinaebfb452016-10-25 10:27:33 -040052SI void SK_VECTORCALL next(BodyStage* st, size_t x,
53 SkNf r, SkNf g, SkNf b, SkNf a,
54 SkNf dr, SkNf dg, SkNf db, SkNf da) {
55 st->next(st+1, x, r,g,b,a, dr,dg,db,da);
Mike Klein2878e762016-10-19 21:05:17 -040056}
Mike Kleinaebfb452016-10-25 10:27:33 -040057SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail,
58 SkNf r, SkNf g, SkNf b, SkNf a,
59 SkNf dr, SkNf dg, SkNf db, SkNf da) {
60 st->next(st+1, x,tail, r,g,b,a, dr,dg,db,da);
Mike Klein49372e62016-10-20 18:05:23 -040061}
62
Mike Klein2878e762016-10-19 21:05:17 -040063
Mike Kleinaebfb452016-10-25 10:27:33 -040064#define STAGE(name, kCallNext) \
65 template <bool kIsTail> \
66 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
67 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
68 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
69 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
70 SkNf r, SkNf g, SkNf b, SkNf a, \
71 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
72 name##_kernel<false>(st->ctx, x,0, r,g,b,a, dr,dg,db,da); \
73 if (kCallNext) { \
74 next(st, x, r,g,b,a, dr,dg,db,da); \
75 } \
76 } \
77 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
78 SkNf r, SkNf g, SkNf b, SkNf a, \
79 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
80 name##_kernel<true>(st->ctx, x,tail, r,g,b,a, dr,dg,db,da); \
81 if (kCallNext) { \
82 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
83 } \
84 } \
85 template <bool kIsTail> \
86 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
87 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
Mike Klein04adfda2016-10-12 09:52:55 -040088 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da)
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040089
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040090
Mike Klein9161ef02016-10-04 14:03:27 -040091// Many xfermodes apply the same logic to each channel.
Mike Kleinaebfb452016-10-25 10:27:33 -040092#define RGBA_XFERMODE(name) \
93 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
94 const SkNf& d, const SkNf& da); \
95 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
96 SkNf r, SkNf g, SkNf b, SkNf a, \
97 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
98 r = name##_kernel(r,a,dr,da); \
99 g = name##_kernel(g,a,dg,da); \
100 b = name##_kernel(b,a,db,da); \
101 a = name##_kernel(a,a,da,da); \
102 next(st, x, r,g,b,a, dr,dg,db,da); \
103 } \
104 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
105 SkNf r, SkNf g, SkNf b, SkNf a, \
106 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
107 r = name##_kernel(r,a,dr,da); \
108 g = name##_kernel(g,a,dg,da); \
109 b = name##_kernel(b,a,db,da); \
110 a = name##_kernel(a,a,da,da); \
111 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
112 } \
113 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400114 const SkNf& d, const SkNf& da)
Mike Klein9161ef02016-10-04 14:03:27 -0400115
116// Most of the rest apply the same logic to color channels and use srcover's alpha logic.
Mike Kleinaebfb452016-10-25 10:27:33 -0400117#define RGB_XFERMODE(name) \
118 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
119 const SkNf& d, const SkNf& da); \
120 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
121 SkNf r, SkNf g, SkNf b, SkNf a, \
122 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
123 r = name##_kernel(r,a,dr,da); \
124 g = name##_kernel(g,a,dg,da); \
125 b = name##_kernel(b,a,db,da); \
126 a = a + (da * (1.0f-a)); \
127 next(st, x, r,g,b,a, dr,dg,db,da); \
128 } \
129 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
130 SkNf r, SkNf g, SkNf b, SkNf a, \
131 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
132 r = name##_kernel(r,a,dr,da); \
133 g = name##_kernel(g,a,dg,da); \
134 b = name##_kernel(b,a,db,da); \
135 a = a + (da * (1.0f-a)); \
136 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
137 } \
138 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400139 const SkNf& d, const SkNf& da)
140
Mike Kleinaebfb452016-10-25 10:27:33 -0400141SI SkNf inv(const SkNf& x) { return 1.0f - x; }
142
143SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
144 return SkNx_fma(to-from, cov, from);
145}
146
147template <bool kIsTail, typename T>
148SI SkNx<N,T> load(size_t tail, const T* src) {
149 SkASSERT(kIsTail == (tail > 0));
150 // TODO: maskload for 32- and 64-bit T
151 if (kIsTail) {
152 T buf[8] = {0};
153 switch (tail & (N-1)) {
154 case 7: buf[6] = src[6];
155 case 6: buf[5] = src[5];
156 case 5: buf[4] = src[4];
157 case 4: buf[3] = src[3];
158 case 3: buf[2] = src[2];
159 case 2: buf[1] = src[1];
160 }
161 buf[0] = src[0];
162 return SkNx<N,T>::Load(buf);
163 }
164 return SkNx<N,T>::Load(src);
165}
166
167template <bool kIsTail, typename T>
168SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
169 SkASSERT(kIsTail == (tail > 0));
170 // TODO: maskstore for 32- and 64-bit T
171 if (kIsTail) {
172 switch (tail & (N-1)) {
173 case 7: dst[6] = v[6];
174 case 6: dst[5] = v[5];
175 case 5: dst[4] = v[4];
176 case 4: dst[3] = v[3];
177 case 3: dst[2] = v[2];
178 case 2: dst[1] = v[1];
179 }
180 dst[0] = v[0];
181 return;
182 }
183 v.store(dst);
184}
185
186SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) {
187 auto _32_bit = SkNx_cast<int>(_565);
188
189 *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE);
190 *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE);
191 *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE);
192}
193
194SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
195 return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT
196 | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT
197 | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT);
198}
199
200STAGE(just_return, false) { }
201
Mike Kleina9312fd2016-11-16 13:38:15 -0500202STAGE(trace, true) {
203 SkDebugf("%s\n", (const char*)ctx);
204}
205
206STAGE(registers, true) {
207 auto print = [](const char* name, const SkNf& v) {
208 SkDebugf("%s:", name);
209 for (int i = 0; i < N; i++) {
210 SkDebugf(" %g", v[i]);
211 }
212 SkDebugf("\n");
213 };
214 print(" r", r);
215 print(" g", g);
216 print(" b", b);
217 print(" a", a);
218 print("dr", dr);
219 print("dg", dg);
220 print("db", db);
221 print("da", da);
222}
223
Mike Klein130863e2016-10-27 11:29:36 -0400224STAGE(clamp_0, true) {
225 a = SkNf::Max(a, 0.0f);
226 r = SkNf::Max(r, 0.0f);
227 g = SkNf::Max(g, 0.0f);
228 b = SkNf::Max(b, 0.0f);
229}
Mike Klein130863e2016-10-27 11:29:36 -0400230
Mike Kleineea7c162016-11-03 10:20:35 -0400231STAGE(clamp_a, true) {
Mike Klein130863e2016-10-27 11:29:36 -0400232 a = SkNf::Min(a, 1.0f);
233 r = SkNf::Min(r, a);
234 g = SkNf::Min(g, a);
235 b = SkNf::Min(b, a);
236}
237
Matt Sarettdb4d4062016-11-16 16:07:15 -0500238STAGE(clamp_1, true) {
239 a = SkNf::Min(a, 1.0f);
240 r = SkNf::Min(r, 1.0f);
241 g = SkNf::Min(g, 1.0f);
242 b = SkNf::Min(b, 1.0f);
243}
244
Mike Kleineea7c162016-11-03 10:20:35 -0400245STAGE(unpremul, true) {
246 r *= a.invert();
247 g *= a.invert();
248 b *= a.invert();
249}
250
251STAGE(premul, true) {
252 r *= a;
253 g *= a;
254 b *= a;
255}
256
Mike Kleinc5093412016-11-04 16:36:39 -0400257STAGE(move_src_dst, true) {
258 dr = r;
259 dg = g;
260 db = b;
261 da = a;
Mike Kleinaebfb452016-10-25 10:27:33 -0400262}
263
Mike Kleinfb191da2016-11-15 13:20:33 -0500264STAGE(swap_src_dst, true) {
265 SkTSwap(r, dr);
266 SkTSwap(g, dg);
267 SkTSwap(b, db);
268 SkTSwap(a, da);
269}
270
Mike Kleinaebfb452016-10-25 10:27:33 -0400271// The default shader produces a constant color (from the SkPaint).
272STAGE(constant_color, true) {
273 auto color = (const SkPM4f*)ctx;
274 r = color->r();
275 g = color->g();
276 b = color->b();
277 a = color->a();
278}
279
Mike Klein66866172016-11-03 12:22:01 -0400280// s' = sc for a constant c.
281STAGE(scale_constant_float, true) {
282 SkNf c = *(const float*)ctx;
283
284 r *= c;
285 g *= c;
286 b *= c;
287 a *= c;
288}
289
Mike Kleinaebfb452016-10-25 10:27:33 -0400290// s' = d(1-c) + sc, for a constant c.
291STAGE(lerp_constant_float, true) {
292 SkNf c = *(const float*)ctx;
293
294 r = lerp(dr, r, c);
295 g = lerp(dg, g, c);
296 b = lerp(db, b, c);
297 a = lerp(da, a, c);
298}
299
300// s' = sc for 8-bit c.
301STAGE(scale_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400302 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400303
304 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
305 r = r*c;
306 g = g*c;
307 b = b*c;
308 a = a*c;
309}
310
311// s' = d(1-c) + sc for 8-bit c.
312STAGE(lerp_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400313 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400314
315 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
316 r = lerp(dr, r, c);
317 g = lerp(dg, g, c);
318 b = lerp(db, b, c);
319 a = lerp(da, a, c);
320}
321
322// s' = d(1-c) + sc for 565 c.
323STAGE(lerp_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400324 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400325 SkNf cr, cg, cb;
326 from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
327
328 r = lerp(dr, r, cr);
329 g = lerp(dg, g, cg);
330 b = lerp(db, b, cb);
331 a = 1.0f;
332}
333
334STAGE(load_d_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400335 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400336 from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
337 da = 1.0f;
338}
339
340STAGE(load_s_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400341 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400342 from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
343 a = 1.0f;
344}
345
346STAGE(store_565, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400347 auto ptr = *(uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400348 store<kIsTail>(tail, to_565(r,g,b), ptr);
349}
350
351STAGE(load_d_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400352 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400353
354 SkNh rh, gh, bh, ah;
355 if (kIsTail) {
356 uint64_t buf[8] = {0};
357 switch (tail & (N-1)) {
358 case 7: buf[6] = ptr[6];
359 case 6: buf[5] = ptr[5];
360 case 5: buf[4] = ptr[4];
361 case 4: buf[3] = ptr[3];
362 case 3: buf[2] = ptr[2];
363 case 2: buf[1] = ptr[1];
364 }
365 buf[0] = ptr[0];
366 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
367 } else {
368 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
369 }
370
371 dr = SkHalfToFloat_finite_ftz(rh);
372 dg = SkHalfToFloat_finite_ftz(gh);
373 db = SkHalfToFloat_finite_ftz(bh);
374 da = SkHalfToFloat_finite_ftz(ah);
375}
376
377STAGE(load_s_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400378 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400379
380 SkNh rh, gh, bh, ah;
381 if (kIsTail) {
382 uint64_t buf[8] = {0};
383 switch (tail & (N-1)) {
384 case 7: buf[6] = ptr[6];
385 case 6: buf[5] = ptr[5];
386 case 5: buf[4] = ptr[4];
387 case 4: buf[3] = ptr[3];
388 case 3: buf[2] = ptr[2];
389 case 2: buf[1] = ptr[1];
390 }
391 buf[0] = ptr[0];
392 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
393 } else {
394 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
395 }
396
397 r = SkHalfToFloat_finite_ftz(rh);
398 g = SkHalfToFloat_finite_ftz(gh);
399 b = SkHalfToFloat_finite_ftz(bh);
400 a = SkHalfToFloat_finite_ftz(ah);
401}
402
403STAGE(store_f16, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400404 auto ptr = *(uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400405
406 uint64_t buf[8];
407 SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
408 SkFloatToHalf_finite_ftz(g),
409 SkFloatToHalf_finite_ftz(b),
410 SkFloatToHalf_finite_ftz(a));
411 if (kIsTail) {
412 switch (tail & (N-1)) {
413 case 7: ptr[6] = buf[6];
414 case 6: ptr[5] = buf[5];
415 case 5: ptr[4] = buf[4];
416 case 4: ptr[3] = buf[3];
417 case 3: ptr[2] = buf[2];
418 case 2: ptr[1] = buf[1];
419 }
420 ptr[0] = buf[0];
421 }
422}
423
mtkleina4a44882016-11-04 13:20:07 -0700424STAGE(store_f32, false) {
425 auto ptr = *(SkPM4f**)ctx + x;
426
427 SkPM4f buf[8];
428 SkNf::Store4(kIsTail ? buf : ptr, r,g,b,a);
429 if (kIsTail) {
430 switch (tail & (N-1)) {
431 case 7: ptr[6] = buf[6];
432 case 6: ptr[5] = buf[5];
433 case 5: ptr[4] = buf[4];
434 case 4: ptr[3] = buf[3];
435 case 3: ptr[2] = buf[2];
436 case 2: ptr[1] = buf[1];
437 }
438 ptr[0] = buf[0];
439 }
440}
441
Mike Kleinaebfb452016-10-25 10:27:33 -0400442
443// Load 8-bit SkPMColor-order sRGB.
444STAGE(load_d_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400445 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400446
447 auto px = load<kIsTail>(tail, ptr);
448 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
449 dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
450 dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
451 db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
452 da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
453}
454
455STAGE(load_s_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400456 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400457
458 auto px = load<kIsTail>(tail, ptr);
459 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
460 r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
461 g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
462 b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
463 a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
464}
465
466STAGE(store_srgb, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400467 auto ptr = *(uint32_t**)ctx + x;
Mike Klein6b059bd2016-11-02 14:47:07 -0400468 store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT
469 | sk_linear_to_srgb(g) << SK_G32_SHIFT
470 | sk_linear_to_srgb(b) << SK_B32_SHIFT
471 | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), (int*)ptr);
Mike Kleinaebfb452016-10-25 10:27:33 -0400472}
473
raftias25636012016-11-11 15:27:39 -0800474STAGE(load_s_8888, true) {
475 auto ptr = *(const uint32_t**)ctx + x;
476
477 auto px = load<kIsTail>(tail, ptr);
478 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
479 r = (1/255.0f)*SkNx_cast<float>(to_int((px >> 0) & 0xff));
480 g = (1/255.0f)*SkNx_cast<float>(to_int((px >> 8) & 0xff));
481 b = (1/255.0f)*SkNx_cast<float>(to_int((px >> 16) & 0xff));
482 a = (1/255.0f)*SkNx_cast<float>(to_int(px >> 24));
483}
484
485STAGE(store_8888, false) {
486 auto ptr = *(uint32_t**)ctx + x;
487 store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0
488 | SkNx_cast<int>(255.0f * g + 0.5f) << 8
489 | SkNx_cast<int>(255.0f * b + 0.5f) << 16
490 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
491}
492
Mike Kleinaebfb452016-10-25 10:27:33 -0400493RGBA_XFERMODE(clear) { return 0.0f; }
494//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
495RGBA_XFERMODE(dst) { return d; }
496
497RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
498RGBA_XFERMODE(srcin) { return s * da; }
499RGBA_XFERMODE(srcout) { return s * inv(da); }
500RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
501RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
502RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
503RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); }
504RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); }
505
506RGBA_XFERMODE(modulate) { return s*d; }
507RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; }
508RGBA_XFERMODE(plus_) { return s + d; }
509RGBA_XFERMODE(screen) { return s + d - s*d; }
510RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); }
511
512RGB_XFERMODE(colorburn) {
513 return (d == da ).thenElse(d + s*inv(da),
514 (s == 0.0f).thenElse(s + d*inv(sa),
515 sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa)));
516}
517RGB_XFERMODE(colordodge) {
518 return (d == 0.0f).thenElse(d + s*inv(da),
519 (s == sa ).thenElse(s + d*inv(sa),
520 sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa)));
521}
522RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); }
523RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); }
524RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; }
525RGB_XFERMODE(hardlight) {
526 return s*inv(da) + d*inv(sa)
527 + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s));
528}
529RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); }
530RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); }
531RGB_XFERMODE(softlight) {
532 SkNf m = (da > 0.0f).thenElse(d / da, 0.0f),
533 s2 = 2.0f*s,
534 m4 = 4.0f*m;
535
536 // The logic forks three ways:
537 // 1. dark src?
538 // 2. light src, dark dst?
539 // 3. light src, light dst?
540 SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1.
541 darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2.
542 liteDst = m.rsqrt().invert() - m, // Used in case 3.
543 liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3?
544 return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
545}
546
Mike Klein1f49f262016-10-31 19:49:27 -0400547STAGE(luminance_to_alpha, true) {
548 a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b;
549 r = g = b = 0;
550}
551
Mike Klein06a65e22016-11-17 12:39:09 -0500552STAGE(matrix_2x3, true) {
553 auto m = (const float*)ctx;
554
555 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
556 auto R = fma(r,m[0], fma(g,m[2], m[4])),
557 G = fma(r,m[1], fma(g,m[3], m[5]));
558 r = R;
559 g = G;
560}
561
raftias25636012016-11-11 15:27:39 -0800562STAGE(matrix_3x4, true) {
563 auto m = (const float*)ctx;
564
565 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
566 auto R = fma(r,m[0], fma(g,m[3], fma(b,m[6], m[ 9]))),
567 G = fma(r,m[1], fma(g,m[4], fma(b,m[7], m[10]))),
568 B = fma(r,m[2], fma(g,m[5], fma(b,m[8], m[11])));
569 r = R;
570 g = G;
571 b = B;
572}
573
Mike Kleineea7c162016-11-03 10:20:35 -0400574STAGE(matrix_4x5, true) {
575 auto m = (const float*)ctx;
576
577 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
578 auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
579 G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
580 B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
581 A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
582 r = R;
583 g = G;
584 b = B;
585 a = A;
586}
Mike Kleinaebfb452016-10-25 10:27:33 -0400587
Mike Kleinc01e7df2016-11-17 16:27:10 -0500588STAGE(matrix_perspective, true) {
589 // N.B. unlike the matrix_NxM stages, this takes a row-major matrix.
590 auto m = (const float*)ctx;
591
592 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
593 auto R = fma(r,m[0], fma(g,m[1], m[2])),
594 G = fma(r,m[3], fma(g,m[4], m[5])),
595 Z = fma(r,m[6], fma(g,m[7], m[8]));
596 r = R * Z.invert();
597 g = G * Z.invert();
598}
599
600
Mike Kleincfcf6242016-11-16 09:01:30 -0500601SI SkNf parametric(const SkNf& v, const SkColorSpaceTransferFn& p) {
602 float result[N]; // Unconstrained powf() doesn't vectorize well...
603 for (int i = 0; i < N; i++) {
604 float s = v[i];
605 result[i] = (s <= p.fD) ? p.fE * s + p.fF
606 : powf(s * p.fA + p.fB, p.fG) + p.fC;
607 }
608 return SkNf::Load(result);
609}
610
611STAGE(parametric_r, true) {
612 r = parametric(r, *(const SkColorSpaceTransferFn*)ctx);
613}
614STAGE(parametric_g, true) {
615 g = parametric(g, *(const SkColorSpaceTransferFn*)ctx);
616}
617STAGE(parametric_b, true) {
618 b = parametric(b, *(const SkColorSpaceTransferFn*)ctx);
619}
620
Matt Sarettdb4d4062016-11-16 16:07:15 -0500621SI SkNf table(const SkNf& v, const SkTableTransferFn& table) {
622 float result[N];
Mike Kleincfcf6242016-11-16 09:01:30 -0500623 for (int i = 0; i < N; i++) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500624 result[i] = interp_lut(v[i], table.fData, table.fSize);
Mike Kleincfcf6242016-11-16 09:01:30 -0500625 }
626 return SkNf::Load(result);
627}
628
629STAGE(table_r, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500630 r = table(r, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500631}
632STAGE(table_g, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500633 g = table(g, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500634}
635STAGE(table_b, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500636 b = table(b, *(const SkTableTransferFn*)ctx);
raftias25636012016-11-11 15:27:39 -0800637}
638
639STAGE(color_lookup_table, true) {
640 const SkColorLookUpTable* colorLUT = (const SkColorLookUpTable*)ctx;
641 float rgb[3];
642 float result[3][N];
643 for (int i = 0; i < N; ++i) {
644 rgb[0] = r[i];
645 rgb[1] = g[i];
646 rgb[2] = b[i];
647 colorLUT->interp3D(rgb, rgb);
648 result[0][i] = rgb[0];
649 result[1][i] = rgb[1];
650 result[2][i] = rgb[2];
651 }
652 r = SkNf::Load(result[0]);
653 g = SkNf::Load(result[1]);
654 b = SkNf::Load(result[2]);
655}
656
657STAGE(lab_to_xyz, true) {
658 const auto lab_l = r * 100.0f;
659 const auto lab_a = g * 255.0f - 128.0f;
660 const auto lab_b = b * 255.0f - 128.0f;
661 auto Y = (lab_l + 16.0f) * (1/116.0f);
662 auto X = lab_a * (1/500.0f) + Y;
663 auto Z = Y - (lab_b * (1/200.0f));
664
665 const auto X3 = X*X*X;
666 X = (X3 > 0.008856f).thenElse(X3, (X - (16/116.0f)) * (1/7.787f));
667 const auto Y3 = Y*Y*Y;
668 Y = (Y3 > 0.008856f).thenElse(Y3, (Y - (16/116.0f)) * (1/7.787f));
669 const auto Z3 = Z*Z*Z;
670 Z = (Z3 > 0.008856f).thenElse(Z3, (Z - (16/116.0f)) * (1/7.787f));
671
672 // adjust to D50 illuminant
673 X *= 0.96422f;
674 Y *= 1.00000f;
675 Z *= 0.82521f;
676
677 r = X;
678 g = Y;
679 b = Z;
680}
681
682STAGE(swap_rb, true) {
683 SkTSwap(r, b);
684}
685
Mike Kleinb273fc42016-11-17 15:42:22 -0500686SI SkNf assert_in_tile(const SkNf& v, float limit) {
687 for (int i = 0; i < N; i++) {
688 SkASSERT(0 <= v[i] && v[i] < limit);
689 }
690 return v;
Mike Klein06a65e22016-11-17 12:39:09 -0500691}
Mike Kleinb273fc42016-11-17 15:42:22 -0500692
693SI SkNf clamp(const SkNf& v, float limit) {
694 SkNf result = SkNf::Max(0, SkNf::Min(v, limit - 0.5f));
695 return assert_in_tile(result, limit);
Mike Klein06a65e22016-11-17 12:39:09 -0500696}
697
Mike Kleinb273fc42016-11-17 15:42:22 -0500698SI SkNf repeat(const SkNf& v, float limit) {
699 SkNf result = v - (v/limit).floor()*limit;
Mike Kleinb273fc42016-11-17 15:42:22 -0500700 // For small negative v, (v/limit).floor()*limit can dominate v in the subtraction,
701 // which leaves result == limit. We want result < limit, so clamp it one ULP.
702 result = SkNf::Min(result, nextafterf(limit, 0));
Mike Kleinb273fc42016-11-17 15:42:22 -0500703 return assert_in_tile(result, limit);
704}
705
Mike Klein2e35e8a2016-11-18 15:47:22 -0500706SI SkNf mirror(const SkNf& v, float l/*imit*/) {
707 SkNf result = ((v - l) - ((v - l) / (2*l)).floor()*(2*l) - l).abs();
708 // Same deal as repeat.
709 result = SkNf::Min(result, nextafterf(l, 0));
710 return assert_in_tile(result, l);
711}
712
Mike Kleinb273fc42016-11-17 15:42:22 -0500713STAGE(clamp_x, true) { r = clamp (r, *(const int*)ctx); }
714STAGE(clamp_y, true) { g = clamp (g, *(const int*)ctx); }
715STAGE(repeat_x, true) { r = repeat(r, *(const int*)ctx); }
716STAGE(repeat_y, true) { g = repeat(g, *(const int*)ctx); }
Mike Klein2e35e8a2016-11-18 15:47:22 -0500717STAGE(mirror_x, true) { r = mirror(r, *(const int*)ctx); }
718STAGE(mirror_y, true) { g = mirror(g, *(const int*)ctx); }
Mike Klein06a65e22016-11-17 12:39:09 -0500719
Mike Klein06a65e22016-11-17 12:39:09 -0500720
721struct NearestCtx {
722 const void* pixels;
723 int stride;
724};
725
726STAGE(nearest_565, true) {} // TODO
727STAGE(nearest_f16, true) {} // TODO
728
729STAGE(nearest_8888, true) {
730 auto nc = (const NearestCtx*)ctx;
731
732 SkNi ix = SkNx_cast<int>(r),
733 iy = SkNx_cast<int>(g);
734 SkNi offset = iy*nc->stride + ix;
735
736 auto p = (const uint32_t*)nc->pixels;
737 uint8_t R[N], G[N], B[N], A[N];
Mike Klein4e9bb742016-11-17 14:28:11 -0500738 for (size_t i = 0; i < N; i++) {
739 if (kIsTail && i >= tail) {
740 R[i] = G[i] = B[i] = A[i] = 0;
741 continue;
742 }
Mike Klein06a65e22016-11-17 12:39:09 -0500743 uint32_t rgba = p[offset[i]];
744 R[i] = rgba >> 0;
745 G[i] = rgba >> 8;
746 B[i] = rgba >> 16;
747 A[i] = rgba >> 24;
748 }
749
750 r = SkNx_cast<float>(SkNb::Load(R)) * (1/255.0f);
751 g = SkNx_cast<float>(SkNb::Load(G)) * (1/255.0f);
752 b = SkNx_cast<float>(SkNb::Load(B)) * (1/255.0f);
753 a = SkNx_cast<float>(SkNb::Load(A)) * (1/255.0f);
754}
755
756STAGE(nearest_srgb, true) {
757 auto nc = (const NearestCtx*)ctx;
758
759 SkNi ix = SkNx_cast<int>(r),
760 iy = SkNx_cast<int>(g);
761 SkNi offset = iy*nc->stride + ix;
762
763 auto p = (const uint32_t*)nc->pixels;
764 uint8_t R[N], G[N], B[N], A[N];
Mike Klein4e9bb742016-11-17 14:28:11 -0500765 for (size_t i = 0; i < N; i++) {
766 if (kIsTail && i >= tail) {
767 R[i] = G[i] = B[i] = A[i] = 0;
768 continue;
769 }
Mike Klein06a65e22016-11-17 12:39:09 -0500770 uint32_t rgba = p[offset[i]];
771 R[i] = rgba >> 0;
772 G[i] = rgba >> 8;
773 B[i] = rgba >> 16;
774 A[i] = rgba >> 24;
775 }
776
777 r = sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(R)));
778 g = sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(G)));
779 b = sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(B)));
780 a = SkNx_cast<float>(SkNb::Load(A)) * (1/255.0f);
781}
782
Mike Kleinaebfb452016-10-25 10:27:33 -0400783template <typename Fn>
784SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
785 switch (st) {
786 #define M(stage) case SkRasterPipeline::stage: return stage;
787 SK_RASTER_PIPELINE_STAGES(M)
788 #undef M
789 }
790 SkASSERT(false);
791 return just_return;
792}
Mike Klein9161ef02016-10-04 14:03:27 -0400793
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400794namespace SK_OPTS_NS {
795
Mike Kleinad48a702016-11-07 17:16:21 -0500796 struct Memset16 {
797 uint16_t** dst;
798 uint16_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500799 void operator()(size_t x, size_t, size_t n) { sk_memset16(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500800 };
801
802 struct Memset32 {
803 uint32_t** dst;
804 uint32_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500805 void operator()(size_t x, size_t, size_t n) { sk_memset32(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500806 };
807
808 struct Memset64 {
809 uint64_t** dst;
810 uint64_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500811 void operator()(size_t x, size_t, size_t n) { sk_memset64(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500812 };
813
Mike Kleinaf49b192016-11-15 08:52:04 -0500814 SI std::function<void(size_t, size_t, size_t)>
815 compile_pipeline(const SkRasterPipeline::Stage* stages, int nstages) {
mtklein125b2aa2016-11-04 13:41:34 -0700816 if (nstages == 2 && stages[0].stage == SkRasterPipeline::constant_color) {
817 SkPM4f src = *(const SkPM4f*)stages[0].ctx;
818 void* dst = stages[1].ctx;
819 switch (stages[1].stage) {
Mike Kleinad48a702016-11-07 17:16:21 -0500820 case SkRasterPipeline::store_565:
821 return Memset16{(uint16_t**)dst, SkPackRGB16(src.r() * SK_R16_MASK + 0.5f,
822 src.g() * SK_G16_MASK + 0.5f,
823 src.b() * SK_B16_MASK + 0.5f)};
824 case SkRasterPipeline::store_srgb:
825 return Memset32{(uint32_t**)dst, Sk4f_toS32(src.to4f_pmorder())};
mtklein125b2aa2016-11-04 13:41:34 -0700826
Mike Kleinad48a702016-11-07 17:16:21 -0500827 case SkRasterPipeline::store_f16:
828 return Memset64{(uint64_t**)dst, src.toF16()};
mtklein125b2aa2016-11-04 13:41:34 -0700829
830 default: break;
831 }
832 }
833
Mike Kleine9f74b82016-10-25 13:31:21 -0400834 struct Compiled {
835 Compiled(const SkRasterPipeline::Stage* stages, int nstages) {
836 if (nstages == 0) {
837 return;
838 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400839
Mike Kleine9f74b82016-10-25 13:31:21 -0400840 fBodyStart = enum_to_Fn<Body>(stages[0].stage);
841 fTailStart = enum_to_Fn<Tail>(stages[0].stage);
842 for (int i = 0; i < nstages-1; i++) {
843 fBody[i].next = enum_to_Fn<Body>(stages[i+1].stage);
844 fTail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
845 fBody[i].ctx = fTail[i].ctx = stages[i].ctx;
846 }
847 fBody[nstages-1].next = just_return;
848 fTail[nstages-1].next = just_return;
849 fBody[nstages-1].ctx = fTail[nstages-1].ctx = stages[nstages-1].ctx;
Mike Klein050ffa92016-10-20 16:20:46 -0400850 }
Mike Kleinaebfb452016-10-25 10:27:33 -0400851
Mike Kleinaf49b192016-11-15 08:52:04 -0500852 void operator()(size_t x, size_t y, size_t n) {
Mike Kleine9f74b82016-10-25 13:31:21 -0400853 SkNf v; // Fastest to start uninitialized.
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400854
Mike Kleinaf49b192016-11-15 08:52:04 -0500855 float dx[] = { 0,1,2,3,4,5,6,7 };
Mike Klein0f91ea42016-11-15 10:31:38 -0500856 SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
857 Y = SkNf(y) + 0.5f;
Mike Kleinaf49b192016-11-15 08:52:04 -0500858
Mike Kleine9f74b82016-10-25 13:31:21 -0400859 while (n >= N) {
Mike Kleinaf49b192016-11-15 08:52:04 -0500860 fBodyStart(fBody, x, v,v,v,v, X,Y,v,v);
Mike Klein0f91ea42016-11-15 10:31:38 -0500861 X += (float)N;
Mike Kleine9f74b82016-10-25 13:31:21 -0400862 x += N;
863 n -= N;
864 }
865 if (n) {
Mike Kleinaf49b192016-11-15 08:52:04 -0500866 fTailStart(fTail, x,n, v,v,v,v, X,Y,v,v);
Mike Kleine9f74b82016-10-25 13:31:21 -0400867 }
Mike Klein050ffa92016-10-20 16:20:46 -0400868 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400869
Mike Kleine9f74b82016-10-25 13:31:21 -0400870 Body fBodyStart = just_return;
871 Tail fTailStart = just_return;
872
873 BodyStage fBody[SkRasterPipeline::kMaxStages];
874 TailStage fTail[SkRasterPipeline::kMaxStages];
875
876 } fn { stages, nstages };
877 return fn;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400878 }
879
Mike Kleinaebfb452016-10-25 10:27:33 -0400880} // namespace SK_OPTS_NS
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400881
Mike Klein04adfda2016-10-12 09:52:55 -0400882#undef SI
883#undef STAGE
884#undef RGBA_XFERMODE
885#undef RGB_XFERMODE
Mike Klein9161ef02016-10-04 14:03:27 -0400886
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400887#endif//SkRasterPipeline_opts_DEFINED