blob: c6fbe6938aa362367dcf375ff16b90f1281ecd90 [file] [log] [blame]
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkRasterPipeline_opts_DEFINED
9#define SkRasterPipeline_opts_DEFINED
10
Mike Klein1f49f262016-10-31 19:49:27 -040011#include "SkColorPriv.h"
raftias25636012016-11-11 15:27:39 -080012#include "SkColorLookUpTable.h"
Matt Sarettdb4d4062016-11-16 16:07:15 -050013#include "SkColorSpaceXform_A2B.h"
14#include "SkColorSpaceXformPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040015#include "SkHalf.h"
Mike Klein46e66a22016-11-21 16:19:34 -050016#include "SkImageShaderContext.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040017#include "SkPM4f.h"
mtklein125b2aa2016-11-04 13:41:34 -070018#include "SkPM4fPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040019#include "SkRasterPipeline.h"
20#include "SkSRGB.h"
mtklein125b2aa2016-11-04 13:41:34 -070021#include "SkUtils.h"
Mike Klein2878e762016-10-19 21:05:17 -040022#include <utility>
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040023
Mike Kleinaebfb452016-10-25 10:27:33 -040024namespace {
25
Mike Klein2878e762016-10-19 21:05:17 -040026#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
27 static constexpr int N = 8;
28#else
29 static constexpr int N = 4;
30#endif
31
mtkleina4a44882016-11-04 13:20:07 -070032 using SkNf = SkNx<N, float>;
33 using SkNi = SkNx<N, int>;
34 using SkNh = SkNx<N, uint16_t>;
Mike Klein06a65e22016-11-17 12:39:09 -050035 using SkNb = SkNx<N, uint8_t>;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040036
Mike Kleinaebfb452016-10-25 10:27:33 -040037 struct BodyStage;
38 struct TailStage;
39
40 using Body = void(SK_VECTORCALL *)(BodyStage*, size_t, SkNf,SkNf,SkNf,SkNf,
41 SkNf,SkNf,SkNf,SkNf);
42 using Tail = void(SK_VECTORCALL *)(TailStage*, size_t, size_t, SkNf,SkNf,SkNf,SkNf,
43 SkNf,SkNf,SkNf,SkNf);
44 struct BodyStage { Body next; void* ctx; };
45 struct TailStage { Tail next; void* ctx; };
46
47} // namespace
Mike Klein2878e762016-10-19 21:05:17 -040048
Mike Klein04adfda2016-10-12 09:52:55 -040049#define SI static inline
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040050
Mike Klein49372e62016-10-20 18:05:23 -040051// Stages are logically a pipeline, and physically are contiguous in an array.
52// To get to the next stage, we just increment our pointer to the next array element.
Mike Kleinaebfb452016-10-25 10:27:33 -040053SI void SK_VECTORCALL next(BodyStage* st, size_t x,
54 SkNf r, SkNf g, SkNf b, SkNf a,
55 SkNf dr, SkNf dg, SkNf db, SkNf da) {
56 st->next(st+1, x, r,g,b,a, dr,dg,db,da);
Mike Klein2878e762016-10-19 21:05:17 -040057}
Mike Kleinaebfb452016-10-25 10:27:33 -040058SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail,
59 SkNf r, SkNf g, SkNf b, SkNf a,
60 SkNf dr, SkNf dg, SkNf db, SkNf da) {
61 st->next(st+1, x,tail, r,g,b,a, dr,dg,db,da);
Mike Klein49372e62016-10-20 18:05:23 -040062}
63
Mike Klein2878e762016-10-19 21:05:17 -040064
Mike Kleinaebfb452016-10-25 10:27:33 -040065#define STAGE(name, kCallNext) \
66 template <bool kIsTail> \
67 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
68 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
69 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
70 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
71 SkNf r, SkNf g, SkNf b, SkNf a, \
72 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
73 name##_kernel<false>(st->ctx, x,0, r,g,b,a, dr,dg,db,da); \
74 if (kCallNext) { \
75 next(st, x, r,g,b,a, dr,dg,db,da); \
76 } \
77 } \
78 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
79 SkNf r, SkNf g, SkNf b, SkNf a, \
80 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
81 name##_kernel<true>(st->ctx, x,tail, r,g,b,a, dr,dg,db,da); \
82 if (kCallNext) { \
83 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
84 } \
85 } \
86 template <bool kIsTail> \
87 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
88 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
Mike Klein04adfda2016-10-12 09:52:55 -040089 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da)
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040090
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040091
Mike Klein9161ef02016-10-04 14:03:27 -040092// Many xfermodes apply the same logic to each channel.
Mike Kleinaebfb452016-10-25 10:27:33 -040093#define RGBA_XFERMODE(name) \
94 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
95 const SkNf& d, const SkNf& da); \
96 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
97 SkNf r, SkNf g, SkNf b, SkNf a, \
98 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
99 r = name##_kernel(r,a,dr,da); \
100 g = name##_kernel(g,a,dg,da); \
101 b = name##_kernel(b,a,db,da); \
102 a = name##_kernel(a,a,da,da); \
103 next(st, x, r,g,b,a, dr,dg,db,da); \
104 } \
105 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
106 SkNf r, SkNf g, SkNf b, SkNf a, \
107 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
108 r = name##_kernel(r,a,dr,da); \
109 g = name##_kernel(g,a,dg,da); \
110 b = name##_kernel(b,a,db,da); \
111 a = name##_kernel(a,a,da,da); \
112 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
113 } \
114 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400115 const SkNf& d, const SkNf& da)
Mike Klein9161ef02016-10-04 14:03:27 -0400116
117// Most of the rest apply the same logic to color channels and use srcover's alpha logic.
Mike Kleinaebfb452016-10-25 10:27:33 -0400118#define RGB_XFERMODE(name) \
119 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
120 const SkNf& d, const SkNf& da); \
121 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
122 SkNf r, SkNf g, SkNf b, SkNf a, \
123 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
124 r = name##_kernel(r,a,dr,da); \
125 g = name##_kernel(g,a,dg,da); \
126 b = name##_kernel(b,a,db,da); \
127 a = a + (da * (1.0f-a)); \
128 next(st, x, r,g,b,a, dr,dg,db,da); \
129 } \
130 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
131 SkNf r, SkNf g, SkNf b, SkNf a, \
132 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
133 r = name##_kernel(r,a,dr,da); \
134 g = name##_kernel(g,a,dg,da); \
135 b = name##_kernel(b,a,db,da); \
136 a = a + (da * (1.0f-a)); \
137 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
138 } \
139 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400140 const SkNf& d, const SkNf& da)
141
Mike Kleinaebfb452016-10-25 10:27:33 -0400142SI SkNf inv(const SkNf& x) { return 1.0f - x; }
143
144SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
145 return SkNx_fma(to-from, cov, from);
146}
147
148template <bool kIsTail, typename T>
149SI SkNx<N,T> load(size_t tail, const T* src) {
150 SkASSERT(kIsTail == (tail > 0));
151 // TODO: maskload for 32- and 64-bit T
152 if (kIsTail) {
153 T buf[8] = {0};
154 switch (tail & (N-1)) {
155 case 7: buf[6] = src[6];
156 case 6: buf[5] = src[5];
157 case 5: buf[4] = src[4];
158 case 4: buf[3] = src[3];
159 case 3: buf[2] = src[2];
160 case 2: buf[1] = src[1];
161 }
162 buf[0] = src[0];
163 return SkNx<N,T>::Load(buf);
164 }
165 return SkNx<N,T>::Load(src);
166}
167
168template <bool kIsTail, typename T>
169SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
170 SkASSERT(kIsTail == (tail > 0));
171 // TODO: maskstore for 32- and 64-bit T
172 if (kIsTail) {
173 switch (tail & (N-1)) {
174 case 7: dst[6] = v[6];
175 case 6: dst[5] = v[5];
176 case 5: dst[4] = v[4];
177 case 4: dst[3] = v[3];
178 case 3: dst[2] = v[2];
179 case 2: dst[1] = v[1];
180 }
181 dst[0] = v[0];
182 return;
183 }
184 v.store(dst);
185}
186
Mike Klein6b77f1c2016-11-22 15:50:12 -0500187SI void from_4444(const SkNh& _4444, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
188 auto _32_bit = SkNx_cast<int>(_4444);
189
190 *r = SkNx_cast<float>(_32_bit & (0xF << SK_R4444_SHIFT)) * (1.0f / (0xF << SK_R4444_SHIFT));
191 *g = SkNx_cast<float>(_32_bit & (0xF << SK_G4444_SHIFT)) * (1.0f / (0xF << SK_G4444_SHIFT));
192 *b = SkNx_cast<float>(_32_bit & (0xF << SK_B4444_SHIFT)) * (1.0f / (0xF << SK_B4444_SHIFT));
193 *a = SkNx_cast<float>(_32_bit & (0xF << SK_A4444_SHIFT)) * (1.0f / (0xF << SK_A4444_SHIFT));
194}
195
Mike Kleinaebfb452016-10-25 10:27:33 -0400196SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) {
197 auto _32_bit = SkNx_cast<int>(_565);
198
199 *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE);
200 *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE);
201 *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE);
202}
203
204SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
205 return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT
206 | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT
207 | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT);
208}
209
210STAGE(just_return, false) { }
211
Mike Kleina9312fd2016-11-16 13:38:15 -0500212STAGE(trace, true) {
213 SkDebugf("%s\n", (const char*)ctx);
214}
215
216STAGE(registers, true) {
217 auto print = [](const char* name, const SkNf& v) {
218 SkDebugf("%s:", name);
219 for (int i = 0; i < N; i++) {
220 SkDebugf(" %g", v[i]);
221 }
222 SkDebugf("\n");
223 };
224 print(" r", r);
225 print(" g", g);
226 print(" b", b);
227 print(" a", a);
228 print("dr", dr);
229 print("dg", dg);
230 print("db", db);
231 print("da", da);
232}
233
Mike Klein130863e2016-10-27 11:29:36 -0400234STAGE(clamp_0, true) {
235 a = SkNf::Max(a, 0.0f);
236 r = SkNf::Max(r, 0.0f);
237 g = SkNf::Max(g, 0.0f);
238 b = SkNf::Max(b, 0.0f);
239}
Mike Klein130863e2016-10-27 11:29:36 -0400240
Mike Kleineea7c162016-11-03 10:20:35 -0400241STAGE(clamp_a, true) {
Mike Klein130863e2016-10-27 11:29:36 -0400242 a = SkNf::Min(a, 1.0f);
243 r = SkNf::Min(r, a);
244 g = SkNf::Min(g, a);
245 b = SkNf::Min(b, a);
246}
247
Matt Sarettdb4d4062016-11-16 16:07:15 -0500248STAGE(clamp_1, true) {
249 a = SkNf::Min(a, 1.0f);
250 r = SkNf::Min(r, 1.0f);
251 g = SkNf::Min(g, 1.0f);
252 b = SkNf::Min(b, 1.0f);
253}
254
Mike Kleineea7c162016-11-03 10:20:35 -0400255STAGE(unpremul, true) {
256 r *= a.invert();
257 g *= a.invert();
258 b *= a.invert();
259}
260
261STAGE(premul, true) {
262 r *= a;
263 g *= a;
264 b *= a;
265}
266
Mike Kleinc5093412016-11-04 16:36:39 -0400267STAGE(move_src_dst, true) {
268 dr = r;
269 dg = g;
270 db = b;
271 da = a;
Mike Kleinaebfb452016-10-25 10:27:33 -0400272}
273
Mike Kleinfb191da2016-11-15 13:20:33 -0500274STAGE(swap_src_dst, true) {
275 SkTSwap(r, dr);
276 SkTSwap(g, dg);
277 SkTSwap(b, db);
278 SkTSwap(a, da);
279}
280
Mike Kleinaebfb452016-10-25 10:27:33 -0400281// The default shader produces a constant color (from the SkPaint).
282STAGE(constant_color, true) {
283 auto color = (const SkPM4f*)ctx;
284 r = color->r();
285 g = color->g();
286 b = color->b();
287 a = color->a();
288}
289
Mike Klein66866172016-11-03 12:22:01 -0400290// s' = sc for a constant c.
291STAGE(scale_constant_float, true) {
292 SkNf c = *(const float*)ctx;
293
294 r *= c;
295 g *= c;
296 b *= c;
297 a *= c;
298}
299
Mike Kleinaebfb452016-10-25 10:27:33 -0400300// s' = d(1-c) + sc, for a constant c.
301STAGE(lerp_constant_float, true) {
302 SkNf c = *(const float*)ctx;
303
304 r = lerp(dr, r, c);
305 g = lerp(dg, g, c);
306 b = lerp(db, b, c);
307 a = lerp(da, a, c);
308}
309
310// s' = sc for 8-bit c.
311STAGE(scale_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400312 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400313
314 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
315 r = r*c;
316 g = g*c;
317 b = b*c;
318 a = a*c;
319}
320
321// s' = d(1-c) + sc for 8-bit c.
322STAGE(lerp_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400323 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400324
325 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
326 r = lerp(dr, r, c);
327 g = lerp(dg, g, c);
328 b = lerp(db, b, c);
329 a = lerp(da, a, c);
330}
331
332// s' = d(1-c) + sc for 565 c.
333STAGE(lerp_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400334 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400335 SkNf cr, cg, cb;
336 from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
337
338 r = lerp(dr, r, cr);
339 g = lerp(dg, g, cg);
340 b = lerp(db, b, cb);
341 a = 1.0f;
342}
343
344STAGE(load_d_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400345 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400346 from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
347 da = 1.0f;
348}
349
350STAGE(load_s_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400351 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400352 from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
353 a = 1.0f;
354}
355
356STAGE(store_565, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400357 auto ptr = *(uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400358 store<kIsTail>(tail, to_565(r,g,b), ptr);
359}
360
361STAGE(load_d_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400362 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400363
364 SkNh rh, gh, bh, ah;
365 if (kIsTail) {
366 uint64_t buf[8] = {0};
367 switch (tail & (N-1)) {
368 case 7: buf[6] = ptr[6];
369 case 6: buf[5] = ptr[5];
370 case 5: buf[4] = ptr[4];
371 case 4: buf[3] = ptr[3];
372 case 3: buf[2] = ptr[2];
373 case 2: buf[1] = ptr[1];
374 }
375 buf[0] = ptr[0];
376 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
377 } else {
378 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
379 }
380
381 dr = SkHalfToFloat_finite_ftz(rh);
382 dg = SkHalfToFloat_finite_ftz(gh);
383 db = SkHalfToFloat_finite_ftz(bh);
384 da = SkHalfToFloat_finite_ftz(ah);
385}
386
387STAGE(load_s_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400388 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400389
390 SkNh rh, gh, bh, ah;
391 if (kIsTail) {
392 uint64_t buf[8] = {0};
393 switch (tail & (N-1)) {
394 case 7: buf[6] = ptr[6];
395 case 6: buf[5] = ptr[5];
396 case 5: buf[4] = ptr[4];
397 case 4: buf[3] = ptr[3];
398 case 3: buf[2] = ptr[2];
399 case 2: buf[1] = ptr[1];
400 }
401 buf[0] = ptr[0];
402 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
403 } else {
404 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
405 }
406
407 r = SkHalfToFloat_finite_ftz(rh);
408 g = SkHalfToFloat_finite_ftz(gh);
409 b = SkHalfToFloat_finite_ftz(bh);
410 a = SkHalfToFloat_finite_ftz(ah);
411}
412
413STAGE(store_f16, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400414 auto ptr = *(uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400415
416 uint64_t buf[8];
417 SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
418 SkFloatToHalf_finite_ftz(g),
419 SkFloatToHalf_finite_ftz(b),
420 SkFloatToHalf_finite_ftz(a));
421 if (kIsTail) {
422 switch (tail & (N-1)) {
423 case 7: ptr[6] = buf[6];
424 case 6: ptr[5] = buf[5];
425 case 5: ptr[4] = buf[4];
426 case 4: ptr[3] = buf[3];
427 case 3: ptr[2] = buf[2];
428 case 2: ptr[1] = buf[1];
429 }
430 ptr[0] = buf[0];
431 }
432}
433
mtkleina4a44882016-11-04 13:20:07 -0700434STAGE(store_f32, false) {
435 auto ptr = *(SkPM4f**)ctx + x;
436
437 SkPM4f buf[8];
438 SkNf::Store4(kIsTail ? buf : ptr, r,g,b,a);
439 if (kIsTail) {
440 switch (tail & (N-1)) {
441 case 7: ptr[6] = buf[6];
442 case 6: ptr[5] = buf[5];
443 case 5: ptr[4] = buf[4];
444 case 4: ptr[3] = buf[3];
445 case 3: ptr[2] = buf[2];
446 case 2: ptr[1] = buf[1];
447 }
448 ptr[0] = buf[0];
449 }
450}
451
Mike Kleinaebfb452016-10-25 10:27:33 -0400452
453// Load 8-bit SkPMColor-order sRGB.
454STAGE(load_d_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400455 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400456
457 auto px = load<kIsTail>(tail, ptr);
458 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
459 dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
460 dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
461 db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
462 da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
463}
464
465STAGE(load_s_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400466 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400467
468 auto px = load<kIsTail>(tail, ptr);
469 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
470 r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
471 g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
472 b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
473 a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
474}
475
476STAGE(store_srgb, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400477 auto ptr = *(uint32_t**)ctx + x;
Mike Klein6b059bd2016-11-02 14:47:07 -0400478 store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT
479 | sk_linear_to_srgb(g) << SK_G32_SHIFT
480 | sk_linear_to_srgb(b) << SK_B32_SHIFT
481 | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), (int*)ptr);
Mike Kleinaebfb452016-10-25 10:27:33 -0400482}
483
raftias25636012016-11-11 15:27:39 -0800484STAGE(load_s_8888, true) {
485 auto ptr = *(const uint32_t**)ctx + x;
486
487 auto px = load<kIsTail>(tail, ptr);
488 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
489 r = (1/255.0f)*SkNx_cast<float>(to_int((px >> 0) & 0xff));
490 g = (1/255.0f)*SkNx_cast<float>(to_int((px >> 8) & 0xff));
491 b = (1/255.0f)*SkNx_cast<float>(to_int((px >> 16) & 0xff));
492 a = (1/255.0f)*SkNx_cast<float>(to_int(px >> 24));
493}
494
495STAGE(store_8888, false) {
496 auto ptr = *(uint32_t**)ctx + x;
497 store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0
498 | SkNx_cast<int>(255.0f * g + 0.5f) << 8
499 | SkNx_cast<int>(255.0f * b + 0.5f) << 16
500 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
501}
502
Mike Kleinaebfb452016-10-25 10:27:33 -0400503RGBA_XFERMODE(clear) { return 0.0f; }
504//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
505RGBA_XFERMODE(dst) { return d; }
506
507RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
508RGBA_XFERMODE(srcin) { return s * da; }
509RGBA_XFERMODE(srcout) { return s * inv(da); }
510RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
511RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
512RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
513RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); }
514RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); }
515
516RGBA_XFERMODE(modulate) { return s*d; }
517RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; }
518RGBA_XFERMODE(plus_) { return s + d; }
519RGBA_XFERMODE(screen) { return s + d - s*d; }
520RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); }
521
522RGB_XFERMODE(colorburn) {
523 return (d == da ).thenElse(d + s*inv(da),
524 (s == 0.0f).thenElse(s + d*inv(sa),
525 sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa)));
526}
527RGB_XFERMODE(colordodge) {
528 return (d == 0.0f).thenElse(d + s*inv(da),
529 (s == sa ).thenElse(s + d*inv(sa),
530 sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa)));
531}
532RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); }
533RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); }
534RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; }
535RGB_XFERMODE(hardlight) {
536 return s*inv(da) + d*inv(sa)
537 + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s));
538}
539RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); }
540RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); }
541RGB_XFERMODE(softlight) {
542 SkNf m = (da > 0.0f).thenElse(d / da, 0.0f),
543 s2 = 2.0f*s,
544 m4 = 4.0f*m;
545
546 // The logic forks three ways:
547 // 1. dark src?
548 // 2. light src, dark dst?
549 // 3. light src, light dst?
550 SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1.
551 darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2.
552 liteDst = m.rsqrt().invert() - m, // Used in case 3.
553 liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3?
554 return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
555}
556
Mike Klein1f49f262016-10-31 19:49:27 -0400557STAGE(luminance_to_alpha, true) {
558 a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b;
559 r = g = b = 0;
560}
561
Mike Klein06a65e22016-11-17 12:39:09 -0500562STAGE(matrix_2x3, true) {
563 auto m = (const float*)ctx;
564
565 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
566 auto R = fma(r,m[0], fma(g,m[2], m[4])),
567 G = fma(r,m[1], fma(g,m[3], m[5]));
568 r = R;
569 g = G;
570}
571
raftias25636012016-11-11 15:27:39 -0800572STAGE(matrix_3x4, true) {
573 auto m = (const float*)ctx;
574
575 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
576 auto R = fma(r,m[0], fma(g,m[3], fma(b,m[6], m[ 9]))),
577 G = fma(r,m[1], fma(g,m[4], fma(b,m[7], m[10]))),
578 B = fma(r,m[2], fma(g,m[5], fma(b,m[8], m[11])));
579 r = R;
580 g = G;
581 b = B;
582}
583
Mike Kleineea7c162016-11-03 10:20:35 -0400584STAGE(matrix_4x5, true) {
585 auto m = (const float*)ctx;
586
587 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
588 auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
589 G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
590 B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
591 A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
592 r = R;
593 g = G;
594 b = B;
595 a = A;
596}
Mike Kleinaebfb452016-10-25 10:27:33 -0400597
Mike Kleinc01e7df2016-11-17 16:27:10 -0500598STAGE(matrix_perspective, true) {
599 // N.B. unlike the matrix_NxM stages, this takes a row-major matrix.
600 auto m = (const float*)ctx;
601
602 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
603 auto R = fma(r,m[0], fma(g,m[1], m[2])),
604 G = fma(r,m[3], fma(g,m[4], m[5])),
605 Z = fma(r,m[6], fma(g,m[7], m[8]));
606 r = R * Z.invert();
607 g = G * Z.invert();
608}
609
610
Mike Kleincfcf6242016-11-16 09:01:30 -0500611SI SkNf parametric(const SkNf& v, const SkColorSpaceTransferFn& p) {
612 float result[N]; // Unconstrained powf() doesn't vectorize well...
613 for (int i = 0; i < N; i++) {
614 float s = v[i];
615 result[i] = (s <= p.fD) ? p.fE * s + p.fF
616 : powf(s * p.fA + p.fB, p.fG) + p.fC;
617 }
618 return SkNf::Load(result);
619}
620
621STAGE(parametric_r, true) {
622 r = parametric(r, *(const SkColorSpaceTransferFn*)ctx);
623}
624STAGE(parametric_g, true) {
625 g = parametric(g, *(const SkColorSpaceTransferFn*)ctx);
626}
627STAGE(parametric_b, true) {
628 b = parametric(b, *(const SkColorSpaceTransferFn*)ctx);
629}
630
Matt Sarettdb4d4062016-11-16 16:07:15 -0500631SI SkNf table(const SkNf& v, const SkTableTransferFn& table) {
632 float result[N];
Mike Kleincfcf6242016-11-16 09:01:30 -0500633 for (int i = 0; i < N; i++) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500634 result[i] = interp_lut(v[i], table.fData, table.fSize);
Mike Kleincfcf6242016-11-16 09:01:30 -0500635 }
636 return SkNf::Load(result);
637}
638
639STAGE(table_r, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500640 r = table(r, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500641}
642STAGE(table_g, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500643 g = table(g, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500644}
645STAGE(table_b, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500646 b = table(b, *(const SkTableTransferFn*)ctx);
raftias25636012016-11-11 15:27:39 -0800647}
648
649STAGE(color_lookup_table, true) {
650 const SkColorLookUpTable* colorLUT = (const SkColorLookUpTable*)ctx;
651 float rgb[3];
652 float result[3][N];
653 for (int i = 0; i < N; ++i) {
654 rgb[0] = r[i];
655 rgb[1] = g[i];
656 rgb[2] = b[i];
657 colorLUT->interp3D(rgb, rgb);
658 result[0][i] = rgb[0];
659 result[1][i] = rgb[1];
660 result[2][i] = rgb[2];
661 }
662 r = SkNf::Load(result[0]);
663 g = SkNf::Load(result[1]);
664 b = SkNf::Load(result[2]);
665}
666
667STAGE(lab_to_xyz, true) {
668 const auto lab_l = r * 100.0f;
669 const auto lab_a = g * 255.0f - 128.0f;
670 const auto lab_b = b * 255.0f - 128.0f;
671 auto Y = (lab_l + 16.0f) * (1/116.0f);
672 auto X = lab_a * (1/500.0f) + Y;
673 auto Z = Y - (lab_b * (1/200.0f));
674
675 const auto X3 = X*X*X;
676 X = (X3 > 0.008856f).thenElse(X3, (X - (16/116.0f)) * (1/7.787f));
677 const auto Y3 = Y*Y*Y;
678 Y = (Y3 > 0.008856f).thenElse(Y3, (Y - (16/116.0f)) * (1/7.787f));
679 const auto Z3 = Z*Z*Z;
680 Z = (Z3 > 0.008856f).thenElse(Z3, (Z - (16/116.0f)) * (1/7.787f));
681
682 // adjust to D50 illuminant
683 X *= 0.96422f;
684 Y *= 1.00000f;
685 Z *= 0.82521f;
686
687 r = X;
688 g = Y;
689 b = Z;
690}
691
692STAGE(swap_rb, true) {
693 SkTSwap(r, b);
694}
695
Mike Kleinb273fc42016-11-17 15:42:22 -0500696SI SkNf assert_in_tile(const SkNf& v, float limit) {
697 for (int i = 0; i < N; i++) {
698 SkASSERT(0 <= v[i] && v[i] < limit);
699 }
700 return v;
Mike Klein06a65e22016-11-17 12:39:09 -0500701}
Mike Kleinb273fc42016-11-17 15:42:22 -0500702
703SI SkNf clamp(const SkNf& v, float limit) {
704 SkNf result = SkNf::Max(0, SkNf::Min(v, limit - 0.5f));
705 return assert_in_tile(result, limit);
Mike Klein06a65e22016-11-17 12:39:09 -0500706}
707
Mike Kleinb273fc42016-11-17 15:42:22 -0500708SI SkNf repeat(const SkNf& v, float limit) {
709 SkNf result = v - (v/limit).floor()*limit;
Mike Kleinb273fc42016-11-17 15:42:22 -0500710 // For small negative v, (v/limit).floor()*limit can dominate v in the subtraction,
711 // which leaves result == limit. We want result < limit, so clamp it one ULP.
712 result = SkNf::Min(result, nextafterf(limit, 0));
Mike Kleinb273fc42016-11-17 15:42:22 -0500713 return assert_in_tile(result, limit);
714}
715
Mike Klein2e35e8a2016-11-18 15:47:22 -0500716SI SkNf mirror(const SkNf& v, float l/*imit*/) {
717 SkNf result = ((v - l) - ((v - l) / (2*l)).floor()*(2*l) - l).abs();
718 // Same deal as repeat.
719 result = SkNf::Min(result, nextafterf(l, 0));
720 return assert_in_tile(result, l);
721}
722
Mike Kleinb273fc42016-11-17 15:42:22 -0500723STAGE(clamp_x, true) { r = clamp (r, *(const int*)ctx); }
724STAGE(clamp_y, true) { g = clamp (g, *(const int*)ctx); }
725STAGE(repeat_x, true) { r = repeat(r, *(const int*)ctx); }
726STAGE(repeat_y, true) { g = repeat(g, *(const int*)ctx); }
Mike Klein2e35e8a2016-11-18 15:47:22 -0500727STAGE(mirror_x, true) { r = mirror(r, *(const int*)ctx); }
728STAGE(mirror_y, true) { g = mirror(g, *(const int*)ctx); }
Mike Klein06a65e22016-11-17 12:39:09 -0500729
Mike Klein46e66a22016-11-21 16:19:34 -0500730STAGE(top_left, true) {
731 auto sc = (SkImageShaderContext*)ctx;
Mike Klein06a65e22016-11-17 12:39:09 -0500732
Mike Klein46e66a22016-11-21 16:19:34 -0500733 r.store(sc->x);
734 g.store(sc->y);
735
736 r -= 0.5f;
737 g -= 0.5f;
738
739 auto fx = r - r.floor(),
740 fy = g - g.floor();
741 b = (1.0f - fx) * (1.0f - fy);
742};
743
744STAGE(top_right, true) {
745 auto sc = (const SkImageShaderContext*)ctx;
746
747 r = SkNf::Load(sc->x) + 0.5f;
748 g = SkNf::Load(sc->y) - 0.5f;
749
750 auto fx = r - r.floor(),
751 fy = g - g.floor();
752 b = fx * (1.0f - fy);
753};
754
755STAGE(bottom_left, true) {
756 auto sc = (const SkImageShaderContext*)ctx;
757
758 r = SkNf::Load(sc->x) - 0.5f;
759 g = SkNf::Load(sc->y) + 0.5f;
760
761 auto fx = r - r.floor(),
762 fy = g - g.floor();
763 b = (1.0f - fx) * fy;
764};
765
766STAGE(bottom_right, true) {
767 auto sc = (const SkImageShaderContext*)ctx;
768
769 r = SkNf::Load(sc->x) + 0.5f;
770 g = SkNf::Load(sc->y) + 0.5f;
771
772 auto fx = r - r.floor(),
773 fy = g - g.floor();
774 b = fx * fy;
Mike Klein06a65e22016-11-17 12:39:09 -0500775};
776
Mike Kleincb2c12b2016-11-22 13:22:48 -0500777template <typename T>
778SI SkNi offset_and_ptr(T** ptr, const void* ctx, const SkNf& x, const SkNf& y) {
Mike Klein46e66a22016-11-21 16:19:34 -0500779 auto sc = (const SkImageShaderContext*)ctx;
Mike Klein06a65e22016-11-17 12:39:09 -0500780
Mike Kleincb2c12b2016-11-22 13:22:48 -0500781 SkNi ix = SkNx_cast<int>(x),
782 iy = SkNx_cast<int>(y);
Mike Klein46e66a22016-11-21 16:19:34 -0500783 SkNi offset = iy*sc->stride + ix;
Mike Klein06a65e22016-11-17 12:39:09 -0500784
Mike Kleincb2c12b2016-11-22 13:22:48 -0500785 *ptr = (const T*)sc->pixels;
786 return offset;
787}
788
Mike Kleincb5338c2016-11-22 14:58:45 -0500789STAGE(accum_a8, true) {} // TODO
790
Mike Kleincb5338c2016-11-22 14:58:45 -0500791STAGE(accum_i8, true) {} // TODO
792STAGE(accum_i8_srgb, true) {} // TODO
793
Mike Klein6b77f1c2016-11-22 15:50:12 -0500794STAGE(accum_g8, true) {
795 const uint8_t* p;
796 SkNi offset = offset_and_ptr(&p, ctx, r, g);
797
798 uint8_t px[N];
799 for (size_t i = 0; i < N; i++) {
800 if (kIsTail && i >= tail) {
801 px[i] = 0;
802 continue;
803 }
804 px[i] = p[offset[i]];
805 }
806
807 SkNf gray = SkNx_cast<float>(SkNb::Load(px)) * (1/255.0f);
808
809 SkNf scale = b;
810 dr += scale * gray;
811 dg += scale * gray;
812 db += scale * gray;
813 da += scale;
814}
815STAGE(accum_g8_srgb, true) {
816 const uint8_t* p;
817 SkNi offset = offset_and_ptr(&p, ctx, r, g);
818
819 uint8_t px[N];
820 for (size_t i = 0; i < N; i++) {
821 if (kIsTail && i >= tail) {
822 px[i] = 0;
823 continue;
824 }
825 px[i] = p[offset[i]];
826 }
827
828 SkNf gray = sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(px)));
829
830 SkNf scale = b;
831 dr += scale * gray;
832 dg += scale * gray;
833 db += scale * gray;
834 da += scale;
835}
836
Mike Kleincb2c12b2016-11-22 13:22:48 -0500837STAGE(accum_565, true) {
838 const uint16_t* p;
839 SkNi offset = offset_and_ptr(&p, ctx, r, g);
840
841 uint16_t px[N];
842 for (size_t i = 0; i < N; i++) {
843 if (kIsTail && i >= tail) {
844 px[i] = 0;
845 continue;
846 }
847 px[i] = p[offset[i]];
848 }
849 SkNf R,G,B;
850 from_565(SkNh::Load(px), &R, &G, &B);
851
852 SkNf scale = b;
853 dr += scale * R;
854 dg += scale * G;
855 db += scale * B;
856 da += scale;
857}
Mike Kleincb5338c2016-11-22 14:58:45 -0500858STAGE(accum_565_srgb, true) {
859 const uint16_t* p;
860 SkNi offset = offset_and_ptr(&p, ctx, r, g);
861
862 uint16_t px[N];
863 for (size_t i = 0; i < N; i++) {
864 if (kIsTail && i >= tail) {
865 px[i] = 0;
866 continue;
867 }
868 px[i] = p[offset[i]];
869 }
870 SkNf R,G,B;
871 from_565(SkNh::Load(px), &R, &G, &B);
872
873 SkNf scale = b;
874 dr += scale * sk_linear_from_srgb_math(R);
875 dg += scale * sk_linear_from_srgb_math(G);
876 db += scale * sk_linear_from_srgb_math(B);
877 da += scale;
878}
879
Mike Klein6b77f1c2016-11-22 15:50:12 -0500880STAGE(accum_4444, true) {
881 const uint16_t* p;
882 SkNi offset = offset_and_ptr(&p, ctx, r, g);
883
884 uint16_t px[N];
885 for (size_t i = 0; i < N; i++) {
886 if (kIsTail && i >= tail) {
887 px[i] = 0;
888 continue;
889 }
890 px[i] = p[offset[i]];
891 }
892
893 SkNf R,G,B,A;
894 from_4444(SkNh::Load(px), &R, &G, &B, &A);
895
896 SkNf scale = b;
897 dr += scale * R;
898 dg += scale * G;
899 db += scale * B;
900 da += scale * A;
901}
902STAGE(accum_4444_srgb, true) {
903 const uint16_t* p;
904 SkNi offset = offset_and_ptr(&p, ctx, r, g);
905
906 uint16_t px[N];
907 for (size_t i = 0; i < N; i++) {
908 if (kIsTail && i >= tail) {
909 px[i] = 0;
910 continue;
911 }
912 px[i] = p[offset[i]];
913 }
914
915 SkNf R,G,B,A;
916 from_4444(SkNh::Load(px), &R, &G, &B, &A);
917
918 SkNf scale = b;
919 dr += scale * sk_linear_from_srgb_math(R);
920 dg += scale * sk_linear_from_srgb_math(G);
921 db += scale * sk_linear_from_srgb_math(B);
922 da += scale * A;
923}
Mike Kleincb5338c2016-11-22 14:58:45 -0500924
925STAGE(accum_8888, true) {
926 const uint32_t* p;
927 SkNi offset = offset_and_ptr(&p, ctx, r, g);
928
929 uint8_t R[N], G[N], B[N], A[N];
930 for (size_t i = 0; i < N; i++) {
931 if (kIsTail && i >= tail) {
932 R[i] = G[i] = B[i] = A[i] = 0;
933 continue;
934 }
935 uint32_t rgba = p[offset[i]];
936 R[i] = rgba >> 0;
937 G[i] = rgba >> 8;
938 B[i] = rgba >> 16;
939 A[i] = rgba >> 24;
940 }
941
942 SkNf scale = b;
943 dr += scale * SkNx_cast<float>(SkNb::Load(R)) * (1/255.0f);
944 dg += scale * SkNx_cast<float>(SkNb::Load(G)) * (1/255.0f);
945 db += scale * SkNx_cast<float>(SkNb::Load(B)) * (1/255.0f);
946 da += scale * SkNx_cast<float>(SkNb::Load(A)) * (1/255.0f);
947}
948STAGE(accum_8888_srgb, true) {
949 const uint32_t* p;
950 SkNi offset = offset_and_ptr(&p, ctx, r, g);
951
952 uint8_t R[N], G[N], B[N], A[N];
953 for (size_t i = 0; i < N; i++) {
954 if (kIsTail && i >= tail) {
955 R[i] = G[i] = B[i] = A[i] = 0;
956 continue;
957 }
958 uint32_t rgba = p[offset[i]];
959 R[i] = rgba >> 0;
960 G[i] = rgba >> 8;
961 B[i] = rgba >> 16;
962 A[i] = rgba >> 24;
963 }
964
965 SkNf scale = b;
966 dr += scale * sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(R)));
967 dg += scale * sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(G)));
968 db += scale * sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(B)));
969 da += scale * SkNx_cast<float>(SkNb::Load(A)) * (1/255.0f);
970}
Mike Kleincb2c12b2016-11-22 13:22:48 -0500971
972STAGE(accum_f16, true) {
973 const uint64_t* p;
974 SkNi offset = offset_and_ptr(&p, ctx, r, g);
975
976 uint16_t R[N], G[N], B[N], A[N];
977 for (size_t i = 0; i < N; i++) {
978 if (kIsTail && i >= tail) {
979 R[i] = G[i] = B[i] = A[i] = 0;
980 continue;
981 }
982 uint64_t rgba = p[offset[i]];
983 R[i] = rgba >> 0;
984 G[i] = rgba >> 16;
985 B[i] = rgba >> 32;
986 A[i] = rgba >> 48;
987 }
988 SkNf scale = b;
989 dr += scale * SkHalfToFloat_finite_ftz(SkNh::Load(R));
990 dg += scale * SkHalfToFloat_finite_ftz(SkNh::Load(G));
991 db += scale * SkHalfToFloat_finite_ftz(SkNh::Load(B));
992 da += scale * SkHalfToFloat_finite_ftz(SkNh::Load(A));
993}
994
Mike Klein06a65e22016-11-17 12:39:09 -0500995
Mike Kleinaebfb452016-10-25 10:27:33 -0400996template <typename Fn>
997SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
998 switch (st) {
999 #define M(stage) case SkRasterPipeline::stage: return stage;
1000 SK_RASTER_PIPELINE_STAGES(M)
1001 #undef M
1002 }
1003 SkASSERT(false);
1004 return just_return;
1005}
Mike Klein9161ef02016-10-04 14:03:27 -04001006
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001007namespace SK_OPTS_NS {
1008
Mike Kleinad48a702016-11-07 17:16:21 -05001009 struct Memset16 {
1010 uint16_t** dst;
1011 uint16_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -05001012 void operator()(size_t x, size_t, size_t n) { sk_memset16(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -05001013 };
1014
1015 struct Memset32 {
1016 uint32_t** dst;
1017 uint32_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -05001018 void operator()(size_t x, size_t, size_t n) { sk_memset32(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -05001019 };
1020
1021 struct Memset64 {
1022 uint64_t** dst;
1023 uint64_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -05001024 void operator()(size_t x, size_t, size_t n) { sk_memset64(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -05001025 };
1026
Mike Kleinaf49b192016-11-15 08:52:04 -05001027 SI std::function<void(size_t, size_t, size_t)>
1028 compile_pipeline(const SkRasterPipeline::Stage* stages, int nstages) {
mtklein125b2aa2016-11-04 13:41:34 -07001029 if (nstages == 2 && stages[0].stage == SkRasterPipeline::constant_color) {
1030 SkPM4f src = *(const SkPM4f*)stages[0].ctx;
1031 void* dst = stages[1].ctx;
1032 switch (stages[1].stage) {
Mike Kleinad48a702016-11-07 17:16:21 -05001033 case SkRasterPipeline::store_565:
1034 return Memset16{(uint16_t**)dst, SkPackRGB16(src.r() * SK_R16_MASK + 0.5f,
1035 src.g() * SK_G16_MASK + 0.5f,
1036 src.b() * SK_B16_MASK + 0.5f)};
1037 case SkRasterPipeline::store_srgb:
1038 return Memset32{(uint32_t**)dst, Sk4f_toS32(src.to4f_pmorder())};
mtklein125b2aa2016-11-04 13:41:34 -07001039
Mike Kleinad48a702016-11-07 17:16:21 -05001040 case SkRasterPipeline::store_f16:
1041 return Memset64{(uint64_t**)dst, src.toF16()};
mtklein125b2aa2016-11-04 13:41:34 -07001042
1043 default: break;
1044 }
1045 }
1046
Mike Kleine9f74b82016-10-25 13:31:21 -04001047 struct Compiled {
1048 Compiled(const SkRasterPipeline::Stage* stages, int nstages) {
1049 if (nstages == 0) {
1050 return;
1051 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001052
Mike Kleine9f74b82016-10-25 13:31:21 -04001053 fBodyStart = enum_to_Fn<Body>(stages[0].stage);
1054 fTailStart = enum_to_Fn<Tail>(stages[0].stage);
1055 for (int i = 0; i < nstages-1; i++) {
1056 fBody[i].next = enum_to_Fn<Body>(stages[i+1].stage);
1057 fTail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
1058 fBody[i].ctx = fTail[i].ctx = stages[i].ctx;
1059 }
1060 fBody[nstages-1].next = just_return;
1061 fTail[nstages-1].next = just_return;
1062 fBody[nstages-1].ctx = fTail[nstages-1].ctx = stages[nstages-1].ctx;
Mike Klein050ffa92016-10-20 16:20:46 -04001063 }
Mike Kleinaebfb452016-10-25 10:27:33 -04001064
Mike Kleinaf49b192016-11-15 08:52:04 -05001065 void operator()(size_t x, size_t y, size_t n) {
Mike Kleinaf49b192016-11-15 08:52:04 -05001066 float dx[] = { 0,1,2,3,4,5,6,7 };
Mike Klein0f91ea42016-11-15 10:31:38 -05001067 SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
Mike Kleinf7f883b2016-11-21 15:09:45 -05001068 Y = SkNf(y) + 0.5f,
1069 _0 = SkNf(0),
1070 _1 = SkNf(1);
Mike Kleinaf49b192016-11-15 08:52:04 -05001071
Mike Kleine9f74b82016-10-25 13:31:21 -04001072 while (n >= N) {
Mike Kleinf7f883b2016-11-21 15:09:45 -05001073 fBodyStart(fBody, x, X,Y,_1,_0, _0,_0,_0,_0);
Mike Klein0f91ea42016-11-15 10:31:38 -05001074 X += (float)N;
Mike Kleine9f74b82016-10-25 13:31:21 -04001075 x += N;
1076 n -= N;
1077 }
1078 if (n) {
Mike Kleinf7f883b2016-11-21 15:09:45 -05001079 fTailStart(fTail, x,n, X,Y,_1,_0, _0,_0,_0,_0);
Mike Kleine9f74b82016-10-25 13:31:21 -04001080 }
Mike Klein050ffa92016-10-20 16:20:46 -04001081 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001082
Mike Kleine9f74b82016-10-25 13:31:21 -04001083 Body fBodyStart = just_return;
1084 Tail fTailStart = just_return;
1085
1086 BodyStage fBody[SkRasterPipeline::kMaxStages];
1087 TailStage fTail[SkRasterPipeline::kMaxStages];
1088
1089 } fn { stages, nstages };
1090 return fn;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001091 }
1092
Mike Kleinaebfb452016-10-25 10:27:33 -04001093} // namespace SK_OPTS_NS
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001094
Mike Klein04adfda2016-10-12 09:52:55 -04001095#undef SI
1096#undef STAGE
1097#undef RGBA_XFERMODE
1098#undef RGB_XFERMODE
Mike Klein9161ef02016-10-04 14:03:27 -04001099
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001100#endif//SkRasterPipeline_opts_DEFINED