blob: 6033952ea878971456158357fbc3eb342095fe4e [file] [log] [blame]
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkRasterPipeline_opts_DEFINED
9#define SkRasterPipeline_opts_DEFINED
10
Mike Klein1f49f262016-10-31 19:49:27 -040011#include "SkColorPriv.h"
raftias25636012016-11-11 15:27:39 -080012#include "SkColorLookUpTable.h"
Matt Sarettdb4d4062016-11-16 16:07:15 -050013#include "SkColorSpaceXform_A2B.h"
14#include "SkColorSpaceXformPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040015#include "SkHalf.h"
Mike Klein46e66a22016-11-21 16:19:34 -050016#include "SkImageShaderContext.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040017#include "SkPM4f.h"
mtklein125b2aa2016-11-04 13:41:34 -070018#include "SkPM4fPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040019#include "SkRasterPipeline.h"
20#include "SkSRGB.h"
mtklein125b2aa2016-11-04 13:41:34 -070021#include "SkUtils.h"
Mike Klein2878e762016-10-19 21:05:17 -040022#include <utility>
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040023
Mike Kleinaebfb452016-10-25 10:27:33 -040024namespace {
25
Mike Klein2878e762016-10-19 21:05:17 -040026#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
27 static constexpr int N = 8;
28#else
29 static constexpr int N = 4;
30#endif
31
mtkleina4a44882016-11-04 13:20:07 -070032 using SkNf = SkNx<N, float>;
Mike Kleind5de0132016-11-28 09:33:02 -050033 using SkNi = SkNx<N, int32_t>;
34 using SkNu = SkNx<N, uint32_t>;
mtkleina4a44882016-11-04 13:20:07 -070035 using SkNh = SkNx<N, uint16_t>;
Mike Klein06a65e22016-11-17 12:39:09 -050036 using SkNb = SkNx<N, uint8_t>;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040037
Mike Kleinaebfb452016-10-25 10:27:33 -040038 struct BodyStage;
39 struct TailStage;
40
41 using Body = void(SK_VECTORCALL *)(BodyStage*, size_t, SkNf,SkNf,SkNf,SkNf,
42 SkNf,SkNf,SkNf,SkNf);
43 using Tail = void(SK_VECTORCALL *)(TailStage*, size_t, size_t, SkNf,SkNf,SkNf,SkNf,
44 SkNf,SkNf,SkNf,SkNf);
45 struct BodyStage { Body next; void* ctx; };
46 struct TailStage { Tail next; void* ctx; };
47
48} // namespace
Mike Klein2878e762016-10-19 21:05:17 -040049
Mike Klein04adfda2016-10-12 09:52:55 -040050#define SI static inline
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040051
Mike Klein49372e62016-10-20 18:05:23 -040052// Stages are logically a pipeline, and physically are contiguous in an array.
53// To get to the next stage, we just increment our pointer to the next array element.
Mike Kleinaebfb452016-10-25 10:27:33 -040054SI void SK_VECTORCALL next(BodyStage* st, size_t x,
55 SkNf r, SkNf g, SkNf b, SkNf a,
56 SkNf dr, SkNf dg, SkNf db, SkNf da) {
57 st->next(st+1, x, r,g,b,a, dr,dg,db,da);
Mike Klein2878e762016-10-19 21:05:17 -040058}
Mike Kleinaebfb452016-10-25 10:27:33 -040059SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail,
60 SkNf r, SkNf g, SkNf b, SkNf a,
61 SkNf dr, SkNf dg, SkNf db, SkNf da) {
62 st->next(st+1, x,tail, r,g,b,a, dr,dg,db,da);
Mike Klein49372e62016-10-20 18:05:23 -040063}
64
Mike Klein2878e762016-10-19 21:05:17 -040065
Mike Kleinaebfb452016-10-25 10:27:33 -040066#define STAGE(name, kCallNext) \
67 template <bool kIsTail> \
68 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
69 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
70 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
71 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
72 SkNf r, SkNf g, SkNf b, SkNf a, \
73 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
74 name##_kernel<false>(st->ctx, x,0, r,g,b,a, dr,dg,db,da); \
75 if (kCallNext) { \
76 next(st, x, r,g,b,a, dr,dg,db,da); \
77 } \
78 } \
79 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
80 SkNf r, SkNf g, SkNf b, SkNf a, \
81 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
82 name##_kernel<true>(st->ctx, x,tail, r,g,b,a, dr,dg,db,da); \
83 if (kCallNext) { \
84 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
85 } \
86 } \
87 template <bool kIsTail> \
88 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
89 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
Mike Klein04adfda2016-10-12 09:52:55 -040090 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da)
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040091
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040092
Mike Klein9161ef02016-10-04 14:03:27 -040093// Many xfermodes apply the same logic to each channel.
Mike Kleinaebfb452016-10-25 10:27:33 -040094#define RGBA_XFERMODE(name) \
95 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
96 const SkNf& d, const SkNf& da); \
97 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
98 SkNf r, SkNf g, SkNf b, SkNf a, \
99 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
100 r = name##_kernel(r,a,dr,da); \
101 g = name##_kernel(g,a,dg,da); \
102 b = name##_kernel(b,a,db,da); \
103 a = name##_kernel(a,a,da,da); \
104 next(st, x, r,g,b,a, dr,dg,db,da); \
105 } \
106 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
107 SkNf r, SkNf g, SkNf b, SkNf a, \
108 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
109 r = name##_kernel(r,a,dr,da); \
110 g = name##_kernel(g,a,dg,da); \
111 b = name##_kernel(b,a,db,da); \
112 a = name##_kernel(a,a,da,da); \
113 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
114 } \
115 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400116 const SkNf& d, const SkNf& da)
Mike Klein9161ef02016-10-04 14:03:27 -0400117
118// Most of the rest apply the same logic to color channels and use srcover's alpha logic.
Mike Kleinaebfb452016-10-25 10:27:33 -0400119#define RGB_XFERMODE(name) \
120 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
121 const SkNf& d, const SkNf& da); \
122 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
123 SkNf r, SkNf g, SkNf b, SkNf a, \
124 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
125 r = name##_kernel(r,a,dr,da); \
126 g = name##_kernel(g,a,dg,da); \
127 b = name##_kernel(b,a,db,da); \
128 a = a + (da * (1.0f-a)); \
129 next(st, x, r,g,b,a, dr,dg,db,da); \
130 } \
131 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
132 SkNf r, SkNf g, SkNf b, SkNf a, \
133 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
134 r = name##_kernel(r,a,dr,da); \
135 g = name##_kernel(g,a,dg,da); \
136 b = name##_kernel(b,a,db,da); \
137 a = a + (da * (1.0f-a)); \
138 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
139 } \
140 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400141 const SkNf& d, const SkNf& da)
142
Mike Kleinaebfb452016-10-25 10:27:33 -0400143SI SkNf inv(const SkNf& x) { return 1.0f - x; }
144
145SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
146 return SkNx_fma(to-from, cov, from);
147}
148
149template <bool kIsTail, typename T>
150SI SkNx<N,T> load(size_t tail, const T* src) {
151 SkASSERT(kIsTail == (tail > 0));
152 // TODO: maskload for 32- and 64-bit T
153 if (kIsTail) {
154 T buf[8] = {0};
155 switch (tail & (N-1)) {
156 case 7: buf[6] = src[6];
157 case 6: buf[5] = src[5];
158 case 5: buf[4] = src[4];
159 case 4: buf[3] = src[3];
160 case 3: buf[2] = src[2];
161 case 2: buf[1] = src[1];
162 }
163 buf[0] = src[0];
164 return SkNx<N,T>::Load(buf);
165 }
166 return SkNx<N,T>::Load(src);
167}
168
169template <bool kIsTail, typename T>
170SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
171 SkASSERT(kIsTail == (tail > 0));
172 // TODO: maskstore for 32- and 64-bit T
173 if (kIsTail) {
174 switch (tail & (N-1)) {
175 case 7: dst[6] = v[6];
176 case 6: dst[5] = v[5];
177 case 5: dst[4] = v[4];
178 case 4: dst[3] = v[3];
179 case 3: dst[2] = v[2];
180 case 2: dst[1] = v[1];
181 }
182 dst[0] = v[0];
183 return;
184 }
185 v.store(dst);
186}
187
Mike Kleind5de0132016-11-28 09:33:02 -0500188SI void from_8888(const SkNu& _8888, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
189 auto to_float = [](const SkNu& v) { return SkNx_cast<float>(SkNi::Load(&v)); };
190 *r = (1/255.0f)*to_float((_8888 >> 0) & 0xff);
191 *g = (1/255.0f)*to_float((_8888 >> 8) & 0xff);
192 *b = (1/255.0f)*to_float((_8888 >> 16) & 0xff);
193 *a = (1/255.0f)*to_float( _8888 >> 24 );
194}
195
Mike Klein6b77f1c2016-11-22 15:50:12 -0500196SI void from_4444(const SkNh& _4444, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
197 auto _32_bit = SkNx_cast<int>(_4444);
198
199 *r = SkNx_cast<float>(_32_bit & (0xF << SK_R4444_SHIFT)) * (1.0f / (0xF << SK_R4444_SHIFT));
200 *g = SkNx_cast<float>(_32_bit & (0xF << SK_G4444_SHIFT)) * (1.0f / (0xF << SK_G4444_SHIFT));
201 *b = SkNx_cast<float>(_32_bit & (0xF << SK_B4444_SHIFT)) * (1.0f / (0xF << SK_B4444_SHIFT));
202 *a = SkNx_cast<float>(_32_bit & (0xF << SK_A4444_SHIFT)) * (1.0f / (0xF << SK_A4444_SHIFT));
203}
204
Mike Kleinaebfb452016-10-25 10:27:33 -0400205SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) {
206 auto _32_bit = SkNx_cast<int>(_565);
207
208 *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE);
209 *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE);
210 *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE);
211}
212
213SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
214 return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT
215 | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT
216 | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT);
217}
218
219STAGE(just_return, false) { }
220
Mike Kleina9312fd2016-11-16 13:38:15 -0500221STAGE(trace, true) {
222 SkDebugf("%s\n", (const char*)ctx);
223}
224
225STAGE(registers, true) {
226 auto print = [](const char* name, const SkNf& v) {
227 SkDebugf("%s:", name);
228 for (int i = 0; i < N; i++) {
229 SkDebugf(" %g", v[i]);
230 }
231 SkDebugf("\n");
232 };
233 print(" r", r);
234 print(" g", g);
235 print(" b", b);
236 print(" a", a);
237 print("dr", dr);
238 print("dg", dg);
239 print("db", db);
240 print("da", da);
241}
242
Mike Klein130863e2016-10-27 11:29:36 -0400243STAGE(clamp_0, true) {
244 a = SkNf::Max(a, 0.0f);
245 r = SkNf::Max(r, 0.0f);
246 g = SkNf::Max(g, 0.0f);
247 b = SkNf::Max(b, 0.0f);
248}
Mike Klein130863e2016-10-27 11:29:36 -0400249
Mike Kleineea7c162016-11-03 10:20:35 -0400250STAGE(clamp_a, true) {
Mike Klein130863e2016-10-27 11:29:36 -0400251 a = SkNf::Min(a, 1.0f);
252 r = SkNf::Min(r, a);
253 g = SkNf::Min(g, a);
254 b = SkNf::Min(b, a);
255}
256
Matt Sarettdb4d4062016-11-16 16:07:15 -0500257STAGE(clamp_1, true) {
258 a = SkNf::Min(a, 1.0f);
259 r = SkNf::Min(r, 1.0f);
260 g = SkNf::Min(g, 1.0f);
261 b = SkNf::Min(b, 1.0f);
262}
263
Mike Kleineea7c162016-11-03 10:20:35 -0400264STAGE(unpremul, true) {
265 r *= a.invert();
266 g *= a.invert();
267 b *= a.invert();
268}
269
270STAGE(premul, true) {
271 r *= a;
272 g *= a;
273 b *= a;
274}
275
Mike Kleinc5093412016-11-04 16:36:39 -0400276STAGE(move_src_dst, true) {
277 dr = r;
278 dg = g;
279 db = b;
280 da = a;
Mike Kleinaebfb452016-10-25 10:27:33 -0400281}
282
Mike Kleind5de0132016-11-28 09:33:02 -0500283STAGE(move_dst_src, true) {
284 r = dr;
285 g = dg;
286 b = db;
287 a = da;
Mike Kleinfb191da2016-11-15 13:20:33 -0500288}
289
Mike Kleind5de0132016-11-28 09:33:02 -0500290STAGE(swap_rb, true) {
291 SkTSwap(r, b);
292}
293
294
Mike Kleinaebfb452016-10-25 10:27:33 -0400295// The default shader produces a constant color (from the SkPaint).
296STAGE(constant_color, true) {
297 auto color = (const SkPM4f*)ctx;
298 r = color->r();
299 g = color->g();
300 b = color->b();
301 a = color->a();
302}
303
Mike Klein66866172016-11-03 12:22:01 -0400304// s' = sc for a constant c.
305STAGE(scale_constant_float, true) {
306 SkNf c = *(const float*)ctx;
307
308 r *= c;
309 g *= c;
310 b *= c;
311 a *= c;
312}
313
Mike Kleinaebfb452016-10-25 10:27:33 -0400314// s' = d(1-c) + sc, for a constant c.
315STAGE(lerp_constant_float, true) {
316 SkNf c = *(const float*)ctx;
317
318 r = lerp(dr, r, c);
319 g = lerp(dg, g, c);
320 b = lerp(db, b, c);
321 a = lerp(da, a, c);
322}
323
324// s' = sc for 8-bit c.
325STAGE(scale_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400326 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400327
328 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
329 r = r*c;
330 g = g*c;
331 b = b*c;
332 a = a*c;
333}
334
335// s' = d(1-c) + sc for 8-bit c.
336STAGE(lerp_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400337 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400338
339 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
340 r = lerp(dr, r, c);
341 g = lerp(dg, g, c);
342 b = lerp(db, b, c);
343 a = lerp(da, a, c);
344}
345
346// s' = d(1-c) + sc for 565 c.
347STAGE(lerp_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400348 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400349 SkNf cr, cg, cb;
350 from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
351
352 r = lerp(dr, r, cr);
353 g = lerp(dg, g, cg);
354 b = lerp(db, b, cb);
355 a = 1.0f;
356}
357
358STAGE(load_d_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400359 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400360 from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
361 da = 1.0f;
362}
363
364STAGE(load_s_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400365 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400366 from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
367 a = 1.0f;
368}
369
370STAGE(store_565, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400371 auto ptr = *(uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400372 store<kIsTail>(tail, to_565(r,g,b), ptr);
373}
374
375STAGE(load_d_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400376 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400377
378 SkNh rh, gh, bh, ah;
379 if (kIsTail) {
380 uint64_t buf[8] = {0};
381 switch (tail & (N-1)) {
382 case 7: buf[6] = ptr[6];
383 case 6: buf[5] = ptr[5];
384 case 5: buf[4] = ptr[4];
385 case 4: buf[3] = ptr[3];
386 case 3: buf[2] = ptr[2];
387 case 2: buf[1] = ptr[1];
388 }
389 buf[0] = ptr[0];
390 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
391 } else {
392 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
393 }
394
395 dr = SkHalfToFloat_finite_ftz(rh);
396 dg = SkHalfToFloat_finite_ftz(gh);
397 db = SkHalfToFloat_finite_ftz(bh);
398 da = SkHalfToFloat_finite_ftz(ah);
399}
400
401STAGE(load_s_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400402 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400403
404 SkNh rh, gh, bh, ah;
405 if (kIsTail) {
406 uint64_t buf[8] = {0};
407 switch (tail & (N-1)) {
408 case 7: buf[6] = ptr[6];
409 case 6: buf[5] = ptr[5];
410 case 5: buf[4] = ptr[4];
411 case 4: buf[3] = ptr[3];
412 case 3: buf[2] = ptr[2];
413 case 2: buf[1] = ptr[1];
414 }
415 buf[0] = ptr[0];
416 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
417 } else {
418 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
419 }
420
421 r = SkHalfToFloat_finite_ftz(rh);
422 g = SkHalfToFloat_finite_ftz(gh);
423 b = SkHalfToFloat_finite_ftz(bh);
424 a = SkHalfToFloat_finite_ftz(ah);
425}
426
427STAGE(store_f16, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400428 auto ptr = *(uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400429
430 uint64_t buf[8];
431 SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
432 SkFloatToHalf_finite_ftz(g),
433 SkFloatToHalf_finite_ftz(b),
434 SkFloatToHalf_finite_ftz(a));
435 if (kIsTail) {
436 switch (tail & (N-1)) {
437 case 7: ptr[6] = buf[6];
438 case 6: ptr[5] = buf[5];
439 case 5: ptr[4] = buf[4];
440 case 4: ptr[3] = buf[3];
441 case 3: ptr[2] = buf[2];
442 case 2: ptr[1] = buf[1];
443 }
444 ptr[0] = buf[0];
445 }
446}
447
mtkleina4a44882016-11-04 13:20:07 -0700448STAGE(store_f32, false) {
449 auto ptr = *(SkPM4f**)ctx + x;
450
451 SkPM4f buf[8];
452 SkNf::Store4(kIsTail ? buf : ptr, r,g,b,a);
453 if (kIsTail) {
454 switch (tail & (N-1)) {
455 case 7: ptr[6] = buf[6];
456 case 6: ptr[5] = buf[5];
457 case 5: ptr[4] = buf[4];
458 case 4: ptr[3] = buf[3];
459 case 3: ptr[2] = buf[2];
460 case 2: ptr[1] = buf[1];
461 }
462 ptr[0] = buf[0];
463 }
464}
465
Mike Kleinaebfb452016-10-25 10:27:33 -0400466
467// Load 8-bit SkPMColor-order sRGB.
468STAGE(load_d_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400469 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400470
471 auto px = load<kIsTail>(tail, ptr);
472 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
473 dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
474 dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
475 db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
476 da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
477}
478
479STAGE(load_s_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400480 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400481
482 auto px = load<kIsTail>(tail, ptr);
483 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
484 r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
485 g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
486 b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
487 a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
488}
489
490STAGE(store_srgb, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400491 auto ptr = *(uint32_t**)ctx + x;
Mike Klein6b059bd2016-11-02 14:47:07 -0400492 store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT
493 | sk_linear_to_srgb(g) << SK_G32_SHIFT
494 | sk_linear_to_srgb(b) << SK_B32_SHIFT
495 | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), (int*)ptr);
Mike Kleinaebfb452016-10-25 10:27:33 -0400496}
497
raftias25636012016-11-11 15:27:39 -0800498STAGE(load_s_8888, true) {
499 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleind5de0132016-11-28 09:33:02 -0500500 from_8888(load<kIsTail>(tail, ptr), &r, &g, &b, &a);
raftias25636012016-11-11 15:27:39 -0800501}
502
503STAGE(store_8888, false) {
504 auto ptr = *(uint32_t**)ctx + x;
505 store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0
506 | SkNx_cast<int>(255.0f * g + 0.5f) << 8
507 | SkNx_cast<int>(255.0f * b + 0.5f) << 16
508 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
509}
510
Mike Kleinaebfb452016-10-25 10:27:33 -0400511RGBA_XFERMODE(clear) { return 0.0f; }
512//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
513RGBA_XFERMODE(dst) { return d; }
514
515RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
516RGBA_XFERMODE(srcin) { return s * da; }
517RGBA_XFERMODE(srcout) { return s * inv(da); }
518RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
519RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
520RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
521RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); }
522RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); }
523
524RGBA_XFERMODE(modulate) { return s*d; }
525RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; }
526RGBA_XFERMODE(plus_) { return s + d; }
527RGBA_XFERMODE(screen) { return s + d - s*d; }
528RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); }
529
530RGB_XFERMODE(colorburn) {
531 return (d == da ).thenElse(d + s*inv(da),
532 (s == 0.0f).thenElse(s + d*inv(sa),
533 sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa)));
534}
535RGB_XFERMODE(colordodge) {
536 return (d == 0.0f).thenElse(d + s*inv(da),
537 (s == sa ).thenElse(s + d*inv(sa),
538 sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa)));
539}
540RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); }
541RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); }
542RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; }
543RGB_XFERMODE(hardlight) {
544 return s*inv(da) + d*inv(sa)
545 + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s));
546}
547RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); }
548RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); }
549RGB_XFERMODE(softlight) {
550 SkNf m = (da > 0.0f).thenElse(d / da, 0.0f),
551 s2 = 2.0f*s,
552 m4 = 4.0f*m;
553
554 // The logic forks three ways:
555 // 1. dark src?
556 // 2. light src, dark dst?
557 // 3. light src, light dst?
558 SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1.
559 darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2.
560 liteDst = m.rsqrt().invert() - m, // Used in case 3.
561 liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3?
562 return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
563}
564
Mike Klein1f49f262016-10-31 19:49:27 -0400565STAGE(luminance_to_alpha, true) {
566 a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b;
567 r = g = b = 0;
568}
569
Mike Klein06a65e22016-11-17 12:39:09 -0500570STAGE(matrix_2x3, true) {
571 auto m = (const float*)ctx;
572
573 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
574 auto R = fma(r,m[0], fma(g,m[2], m[4])),
575 G = fma(r,m[1], fma(g,m[3], m[5]));
576 r = R;
577 g = G;
578}
579
raftias25636012016-11-11 15:27:39 -0800580STAGE(matrix_3x4, true) {
581 auto m = (const float*)ctx;
582
583 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
584 auto R = fma(r,m[0], fma(g,m[3], fma(b,m[6], m[ 9]))),
585 G = fma(r,m[1], fma(g,m[4], fma(b,m[7], m[10]))),
586 B = fma(r,m[2], fma(g,m[5], fma(b,m[8], m[11])));
587 r = R;
588 g = G;
589 b = B;
590}
591
Mike Kleineea7c162016-11-03 10:20:35 -0400592STAGE(matrix_4x5, true) {
593 auto m = (const float*)ctx;
594
595 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
596 auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
597 G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
598 B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
599 A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
600 r = R;
601 g = G;
602 b = B;
603 a = A;
604}
Mike Kleinaebfb452016-10-25 10:27:33 -0400605
Mike Kleinc01e7df2016-11-17 16:27:10 -0500606STAGE(matrix_perspective, true) {
607 // N.B. unlike the matrix_NxM stages, this takes a row-major matrix.
608 auto m = (const float*)ctx;
609
610 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
611 auto R = fma(r,m[0], fma(g,m[1], m[2])),
612 G = fma(r,m[3], fma(g,m[4], m[5])),
613 Z = fma(r,m[6], fma(g,m[7], m[8]));
614 r = R * Z.invert();
615 g = G * Z.invert();
616}
617
618
Mike Kleincfcf6242016-11-16 09:01:30 -0500619SI SkNf parametric(const SkNf& v, const SkColorSpaceTransferFn& p) {
620 float result[N]; // Unconstrained powf() doesn't vectorize well...
621 for (int i = 0; i < N; i++) {
622 float s = v[i];
623 result[i] = (s <= p.fD) ? p.fE * s + p.fF
624 : powf(s * p.fA + p.fB, p.fG) + p.fC;
625 }
626 return SkNf::Load(result);
627}
628
629STAGE(parametric_r, true) {
630 r = parametric(r, *(const SkColorSpaceTransferFn*)ctx);
631}
632STAGE(parametric_g, true) {
633 g = parametric(g, *(const SkColorSpaceTransferFn*)ctx);
634}
635STAGE(parametric_b, true) {
636 b = parametric(b, *(const SkColorSpaceTransferFn*)ctx);
637}
638
Matt Sarettdb4d4062016-11-16 16:07:15 -0500639SI SkNf table(const SkNf& v, const SkTableTransferFn& table) {
640 float result[N];
Mike Kleincfcf6242016-11-16 09:01:30 -0500641 for (int i = 0; i < N; i++) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500642 result[i] = interp_lut(v[i], table.fData, table.fSize);
Mike Kleincfcf6242016-11-16 09:01:30 -0500643 }
644 return SkNf::Load(result);
645}
646
647STAGE(table_r, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500648 r = table(r, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500649}
650STAGE(table_g, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500651 g = table(g, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500652}
653STAGE(table_b, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500654 b = table(b, *(const SkTableTransferFn*)ctx);
raftias25636012016-11-11 15:27:39 -0800655}
656
657STAGE(color_lookup_table, true) {
658 const SkColorLookUpTable* colorLUT = (const SkColorLookUpTable*)ctx;
659 float rgb[3];
660 float result[3][N];
661 for (int i = 0; i < N; ++i) {
662 rgb[0] = r[i];
663 rgb[1] = g[i];
664 rgb[2] = b[i];
665 colorLUT->interp3D(rgb, rgb);
666 result[0][i] = rgb[0];
667 result[1][i] = rgb[1];
668 result[2][i] = rgb[2];
669 }
670 r = SkNf::Load(result[0]);
671 g = SkNf::Load(result[1]);
672 b = SkNf::Load(result[2]);
673}
674
675STAGE(lab_to_xyz, true) {
676 const auto lab_l = r * 100.0f;
677 const auto lab_a = g * 255.0f - 128.0f;
678 const auto lab_b = b * 255.0f - 128.0f;
679 auto Y = (lab_l + 16.0f) * (1/116.0f);
680 auto X = lab_a * (1/500.0f) + Y;
681 auto Z = Y - (lab_b * (1/200.0f));
682
683 const auto X3 = X*X*X;
684 X = (X3 > 0.008856f).thenElse(X3, (X - (16/116.0f)) * (1/7.787f));
685 const auto Y3 = Y*Y*Y;
686 Y = (Y3 > 0.008856f).thenElse(Y3, (Y - (16/116.0f)) * (1/7.787f));
687 const auto Z3 = Z*Z*Z;
688 Z = (Z3 > 0.008856f).thenElse(Z3, (Z - (16/116.0f)) * (1/7.787f));
689
690 // adjust to D50 illuminant
691 X *= 0.96422f;
692 Y *= 1.00000f;
693 Z *= 0.82521f;
694
695 r = X;
696 g = Y;
697 b = Z;
698}
699
Mike Kleinb273fc42016-11-17 15:42:22 -0500700SI SkNf assert_in_tile(const SkNf& v, float limit) {
701 for (int i = 0; i < N; i++) {
702 SkASSERT(0 <= v[i] && v[i] < limit);
703 }
704 return v;
Mike Klein06a65e22016-11-17 12:39:09 -0500705}
Mike Kleinb273fc42016-11-17 15:42:22 -0500706
707SI SkNf clamp(const SkNf& v, float limit) {
708 SkNf result = SkNf::Max(0, SkNf::Min(v, limit - 0.5f));
709 return assert_in_tile(result, limit);
Mike Klein06a65e22016-11-17 12:39:09 -0500710}
711
Mike Kleinb273fc42016-11-17 15:42:22 -0500712SI SkNf repeat(const SkNf& v, float limit) {
713 SkNf result = v - (v/limit).floor()*limit;
Mike Kleinb273fc42016-11-17 15:42:22 -0500714 // For small negative v, (v/limit).floor()*limit can dominate v in the subtraction,
715 // which leaves result == limit. We want result < limit, so clamp it one ULP.
716 result = SkNf::Min(result, nextafterf(limit, 0));
Mike Kleinb273fc42016-11-17 15:42:22 -0500717 return assert_in_tile(result, limit);
718}
719
Mike Klein2e35e8a2016-11-18 15:47:22 -0500720SI SkNf mirror(const SkNf& v, float l/*imit*/) {
721 SkNf result = ((v - l) - ((v - l) / (2*l)).floor()*(2*l) - l).abs();
722 // Same deal as repeat.
723 result = SkNf::Min(result, nextafterf(l, 0));
724 return assert_in_tile(result, l);
725}
726
Mike Kleinb273fc42016-11-17 15:42:22 -0500727STAGE(clamp_x, true) { r = clamp (r, *(const int*)ctx); }
728STAGE(clamp_y, true) { g = clamp (g, *(const int*)ctx); }
729STAGE(repeat_x, true) { r = repeat(r, *(const int*)ctx); }
730STAGE(repeat_y, true) { g = repeat(g, *(const int*)ctx); }
Mike Klein2e35e8a2016-11-18 15:47:22 -0500731STAGE(mirror_x, true) { r = mirror(r, *(const int*)ctx); }
732STAGE(mirror_y, true) { g = mirror(g, *(const int*)ctx); }
Mike Klein06a65e22016-11-17 12:39:09 -0500733
Mike Klein46e66a22016-11-21 16:19:34 -0500734STAGE(top_left, true) {
735 auto sc = (SkImageShaderContext*)ctx;
Mike Klein06a65e22016-11-17 12:39:09 -0500736
Mike Klein46e66a22016-11-21 16:19:34 -0500737 r.store(sc->x);
738 g.store(sc->y);
739
740 r -= 0.5f;
741 g -= 0.5f;
742
743 auto fx = r - r.floor(),
744 fy = g - g.floor();
745 b = (1.0f - fx) * (1.0f - fy);
746};
747
748STAGE(top_right, true) {
749 auto sc = (const SkImageShaderContext*)ctx;
750
751 r = SkNf::Load(sc->x) + 0.5f;
752 g = SkNf::Load(sc->y) - 0.5f;
753
754 auto fx = r - r.floor(),
755 fy = g - g.floor();
756 b = fx * (1.0f - fy);
757};
758
759STAGE(bottom_left, true) {
760 auto sc = (const SkImageShaderContext*)ctx;
761
762 r = SkNf::Load(sc->x) - 0.5f;
763 g = SkNf::Load(sc->y) + 0.5f;
764
765 auto fx = r - r.floor(),
766 fy = g - g.floor();
767 b = (1.0f - fx) * fy;
768};
769
770STAGE(bottom_right, true) {
771 auto sc = (const SkImageShaderContext*)ctx;
772
773 r = SkNf::Load(sc->x) + 0.5f;
774 g = SkNf::Load(sc->y) + 0.5f;
775
776 auto fx = r - r.floor(),
777 fy = g - g.floor();
778 b = fx * fy;
Mike Klein06a65e22016-11-17 12:39:09 -0500779};
780
Mike Kleincb2c12b2016-11-22 13:22:48 -0500781template <typename T>
782SI SkNi offset_and_ptr(T** ptr, const void* ctx, const SkNf& x, const SkNf& y) {
Mike Klein46e66a22016-11-21 16:19:34 -0500783 auto sc = (const SkImageShaderContext*)ctx;
Mike Klein06a65e22016-11-17 12:39:09 -0500784
Mike Kleincb2c12b2016-11-22 13:22:48 -0500785 SkNi ix = SkNx_cast<int>(x),
786 iy = SkNx_cast<int>(y);
Mike Klein46e66a22016-11-21 16:19:34 -0500787 SkNi offset = iy*sc->stride + ix;
Mike Klein06a65e22016-11-17 12:39:09 -0500788
Mike Kleincb2c12b2016-11-22 13:22:48 -0500789 *ptr = (const T*)sc->pixels;
790 return offset;
791}
792
Mike Kleind5de0132016-11-28 09:33:02 -0500793template <typename T>
794SI void gather(T (&dst)[N], const T* src, const SkNi& offset, size_t tail) {
795 size_t n = tail ? tail : N;
796 for (size_t i = 0; i < n; i++) { dst[i] = src[offset[i]]; }
797 for (size_t i = n; i < N; i++) { dst[i] = 0; }
798}
799
Mike Kleincb5338c2016-11-22 14:58:45 -0500800STAGE(accum_a8, true) {} // TODO
801
Mike Kleincb5338c2016-11-22 14:58:45 -0500802STAGE(accum_i8, true) {} // TODO
803STAGE(accum_i8_srgb, true) {} // TODO
804
Mike Klein6b77f1c2016-11-22 15:50:12 -0500805STAGE(accum_g8, true) {
806 const uint8_t* p;
807 SkNi offset = offset_and_ptr(&p, ctx, r, g);
808
809 uint8_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500810 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500811
812 SkNf gray = SkNx_cast<float>(SkNb::Load(px)) * (1/255.0f);
813
814 SkNf scale = b;
815 dr += scale * gray;
816 dg += scale * gray;
817 db += scale * gray;
818 da += scale;
819}
820STAGE(accum_g8_srgb, true) {
821 const uint8_t* p;
822 SkNi offset = offset_and_ptr(&p, ctx, r, g);
823
824 uint8_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500825 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500826
827 SkNf gray = sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(px)));
828
829 SkNf scale = b;
830 dr += scale * gray;
831 dg += scale * gray;
832 db += scale * gray;
833 da += scale;
834}
835
Mike Kleincb2c12b2016-11-22 13:22:48 -0500836STAGE(accum_565, true) {
837 const uint16_t* p;
838 SkNi offset = offset_and_ptr(&p, ctx, r, g);
839
840 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500841 gather(px, p, offset, tail);
842
Mike Kleincb2c12b2016-11-22 13:22:48 -0500843 SkNf R,G,B;
844 from_565(SkNh::Load(px), &R, &G, &B);
845
846 SkNf scale = b;
847 dr += scale * R;
848 dg += scale * G;
849 db += scale * B;
850 da += scale;
851}
Mike Kleincb5338c2016-11-22 14:58:45 -0500852STAGE(accum_565_srgb, true) {
853 const uint16_t* p;
854 SkNi offset = offset_and_ptr(&p, ctx, r, g);
855
856 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500857 gather(px, p, offset, tail);
858
Mike Kleincb5338c2016-11-22 14:58:45 -0500859 SkNf R,G,B;
860 from_565(SkNh::Load(px), &R, &G, &B);
861
862 SkNf scale = b;
863 dr += scale * sk_linear_from_srgb_math(R);
864 dg += scale * sk_linear_from_srgb_math(G);
865 db += scale * sk_linear_from_srgb_math(B);
866 da += scale;
867}
868
Mike Klein6b77f1c2016-11-22 15:50:12 -0500869STAGE(accum_4444, true) {
870 const uint16_t* p;
871 SkNi offset = offset_and_ptr(&p, ctx, r, g);
872
873 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500874 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500875
876 SkNf R,G,B,A;
877 from_4444(SkNh::Load(px), &R, &G, &B, &A);
878
879 SkNf scale = b;
880 dr += scale * R;
881 dg += scale * G;
882 db += scale * B;
883 da += scale * A;
884}
885STAGE(accum_4444_srgb, true) {
886 const uint16_t* p;
887 SkNi offset = offset_and_ptr(&p, ctx, r, g);
888
889 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500890 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500891
892 SkNf R,G,B,A;
893 from_4444(SkNh::Load(px), &R, &G, &B, &A);
894
895 SkNf scale = b;
896 dr += scale * sk_linear_from_srgb_math(R);
897 dg += scale * sk_linear_from_srgb_math(G);
898 db += scale * sk_linear_from_srgb_math(B);
899 da += scale * A;
900}
Mike Kleincb5338c2016-11-22 14:58:45 -0500901
902STAGE(accum_8888, true) {
903 const uint32_t* p;
904 SkNi offset = offset_and_ptr(&p, ctx, r, g);
905
Mike Kleind5de0132016-11-28 09:33:02 -0500906 uint32_t px[N];
907 gather(px, p, offset, tail);
908
909 SkNf R,G,B,A;
910 from_8888(SkNu::Load(px), &R, &G, &B, &A);
Mike Kleincb5338c2016-11-22 14:58:45 -0500911
912 SkNf scale = b;
Mike Kleind5de0132016-11-28 09:33:02 -0500913 dr += scale * R;
914 dg += scale * G;
915 db += scale * B;
916 da += scale * A;
Mike Kleincb5338c2016-11-22 14:58:45 -0500917}
918STAGE(accum_8888_srgb, true) {
919 const uint32_t* p;
920 SkNi offset = offset_and_ptr(&p, ctx, r, g);
921
Mike Kleind5de0132016-11-28 09:33:02 -0500922 uint32_t px[N];
923 gather(px, p, offset, tail);
924
925 SkNf R,G,B,A;
926 from_8888(SkNu::Load(px), &R, &G, &B, &A);
Mike Kleincb5338c2016-11-22 14:58:45 -0500927
928 SkNf scale = b;
Mike Kleind5de0132016-11-28 09:33:02 -0500929 dr += scale * sk_linear_from_srgb_math(R);
930 dg += scale * sk_linear_from_srgb_math(G);
931 db += scale * sk_linear_from_srgb_math(B);
932 da += scale * A;
Mike Kleincb5338c2016-11-22 14:58:45 -0500933}
Mike Kleincb2c12b2016-11-22 13:22:48 -0500934
935STAGE(accum_f16, true) {
936 const uint64_t* p;
937 SkNi offset = offset_and_ptr(&p, ctx, r, g);
938
Mike Kleind5de0132016-11-28 09:33:02 -0500939 // f16 -> f32 conversion works best with tightly packed f16s,
940 // so we gather each component rather than using gather().
Mike Kleincb2c12b2016-11-22 13:22:48 -0500941 uint16_t R[N], G[N], B[N], A[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500942 size_t n = tail ? tail : N;
943 for (size_t i = 0; i < n; i++) {
Mike Kleincb2c12b2016-11-22 13:22:48 -0500944 uint64_t rgba = p[offset[i]];
945 R[i] = rgba >> 0;
946 G[i] = rgba >> 16;
947 B[i] = rgba >> 32;
948 A[i] = rgba >> 48;
949 }
Mike Kleind5de0132016-11-28 09:33:02 -0500950 for (size_t i = n; i < N; i++) {
951 R[i] = G[i] = B[i] = A[i] = 0;
952 }
Mike Kleincb2c12b2016-11-22 13:22:48 -0500953 SkNf scale = b;
954 dr += scale * SkHalfToFloat_finite_ftz(SkNh::Load(R));
955 dg += scale * SkHalfToFloat_finite_ftz(SkNh::Load(G));
956 db += scale * SkHalfToFloat_finite_ftz(SkNh::Load(B));
957 da += scale * SkHalfToFloat_finite_ftz(SkNh::Load(A));
958}
959
Mike Klein06a65e22016-11-17 12:39:09 -0500960
Mike Kleinaebfb452016-10-25 10:27:33 -0400961template <typename Fn>
962SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
963 switch (st) {
964 #define M(stage) case SkRasterPipeline::stage: return stage;
965 SK_RASTER_PIPELINE_STAGES(M)
966 #undef M
967 }
968 SkASSERT(false);
969 return just_return;
970}
Mike Klein9161ef02016-10-04 14:03:27 -0400971
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400972namespace SK_OPTS_NS {
973
Mike Kleinad48a702016-11-07 17:16:21 -0500974 struct Memset16 {
975 uint16_t** dst;
976 uint16_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500977 void operator()(size_t x, size_t, size_t n) { sk_memset16(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500978 };
979
980 struct Memset32 {
981 uint32_t** dst;
982 uint32_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500983 void operator()(size_t x, size_t, size_t n) { sk_memset32(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500984 };
985
986 struct Memset64 {
987 uint64_t** dst;
988 uint64_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500989 void operator()(size_t x, size_t, size_t n) { sk_memset64(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500990 };
991
Mike Kleinaf49b192016-11-15 08:52:04 -0500992 SI std::function<void(size_t, size_t, size_t)>
993 compile_pipeline(const SkRasterPipeline::Stage* stages, int nstages) {
mtklein125b2aa2016-11-04 13:41:34 -0700994 if (nstages == 2 && stages[0].stage == SkRasterPipeline::constant_color) {
995 SkPM4f src = *(const SkPM4f*)stages[0].ctx;
996 void* dst = stages[1].ctx;
997 switch (stages[1].stage) {
Mike Kleinad48a702016-11-07 17:16:21 -0500998 case SkRasterPipeline::store_565:
999 return Memset16{(uint16_t**)dst, SkPackRGB16(src.r() * SK_R16_MASK + 0.5f,
1000 src.g() * SK_G16_MASK + 0.5f,
1001 src.b() * SK_B16_MASK + 0.5f)};
1002 case SkRasterPipeline::store_srgb:
1003 return Memset32{(uint32_t**)dst, Sk4f_toS32(src.to4f_pmorder())};
mtklein125b2aa2016-11-04 13:41:34 -07001004
Mike Kleinad48a702016-11-07 17:16:21 -05001005 case SkRasterPipeline::store_f16:
1006 return Memset64{(uint64_t**)dst, src.toF16()};
mtklein125b2aa2016-11-04 13:41:34 -07001007
1008 default: break;
1009 }
1010 }
1011
Mike Kleine9f74b82016-10-25 13:31:21 -04001012 struct Compiled {
1013 Compiled(const SkRasterPipeline::Stage* stages, int nstages) {
1014 if (nstages == 0) {
1015 return;
1016 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001017
Mike Kleine9f74b82016-10-25 13:31:21 -04001018 fBodyStart = enum_to_Fn<Body>(stages[0].stage);
1019 fTailStart = enum_to_Fn<Tail>(stages[0].stage);
1020 for (int i = 0; i < nstages-1; i++) {
1021 fBody[i].next = enum_to_Fn<Body>(stages[i+1].stage);
1022 fTail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
1023 fBody[i].ctx = fTail[i].ctx = stages[i].ctx;
1024 }
1025 fBody[nstages-1].next = just_return;
1026 fTail[nstages-1].next = just_return;
1027 fBody[nstages-1].ctx = fTail[nstages-1].ctx = stages[nstages-1].ctx;
Mike Klein050ffa92016-10-20 16:20:46 -04001028 }
Mike Kleinaebfb452016-10-25 10:27:33 -04001029
Mike Kleinaf49b192016-11-15 08:52:04 -05001030 void operator()(size_t x, size_t y, size_t n) {
Mike Kleinaf49b192016-11-15 08:52:04 -05001031 float dx[] = { 0,1,2,3,4,5,6,7 };
Mike Klein0f91ea42016-11-15 10:31:38 -05001032 SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
Mike Kleinf7f883b2016-11-21 15:09:45 -05001033 Y = SkNf(y) + 0.5f,
1034 _0 = SkNf(0),
1035 _1 = SkNf(1);
Mike Kleinaf49b192016-11-15 08:52:04 -05001036
Mike Kleine9f74b82016-10-25 13:31:21 -04001037 while (n >= N) {
Mike Kleinf7f883b2016-11-21 15:09:45 -05001038 fBodyStart(fBody, x, X,Y,_1,_0, _0,_0,_0,_0);
Mike Klein0f91ea42016-11-15 10:31:38 -05001039 X += (float)N;
Mike Kleine9f74b82016-10-25 13:31:21 -04001040 x += N;
1041 n -= N;
1042 }
1043 if (n) {
Mike Kleinf7f883b2016-11-21 15:09:45 -05001044 fTailStart(fTail, x,n, X,Y,_1,_0, _0,_0,_0,_0);
Mike Kleine9f74b82016-10-25 13:31:21 -04001045 }
Mike Klein050ffa92016-10-20 16:20:46 -04001046 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001047
Mike Kleine9f74b82016-10-25 13:31:21 -04001048 Body fBodyStart = just_return;
1049 Tail fTailStart = just_return;
1050
1051 BodyStage fBody[SkRasterPipeline::kMaxStages];
1052 TailStage fTail[SkRasterPipeline::kMaxStages];
1053
1054 } fn { stages, nstages };
1055 return fn;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001056 }
1057
Mike Kleinaebfb452016-10-25 10:27:33 -04001058} // namespace SK_OPTS_NS
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001059
Mike Klein04adfda2016-10-12 09:52:55 -04001060#undef SI
1061#undef STAGE
1062#undef RGBA_XFERMODE
1063#undef RGB_XFERMODE
Mike Klein9161ef02016-10-04 14:03:27 -04001064
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001065#endif//SkRasterPipeline_opts_DEFINED