blob: 3b4cc2151bf4442a716868174f3c084c087428c4 [file] [log] [blame]
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkRasterPipeline_opts_DEFINED
9#define SkRasterPipeline_opts_DEFINED
10
Mike Klein1f49f262016-10-31 19:49:27 -040011#include "SkColorPriv.h"
raftias25636012016-11-11 15:27:39 -080012#include "SkColorLookUpTable.h"
Matt Sarettdb4d4062016-11-16 16:07:15 -050013#include "SkColorSpaceXform_A2B.h"
14#include "SkColorSpaceXformPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040015#include "SkHalf.h"
Mike Klein46e66a22016-11-21 16:19:34 -050016#include "SkImageShaderContext.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040017#include "SkPM4f.h"
mtklein125b2aa2016-11-04 13:41:34 -070018#include "SkPM4fPriv.h"
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040019#include "SkRasterPipeline.h"
20#include "SkSRGB.h"
mtklein125b2aa2016-11-04 13:41:34 -070021#include "SkUtils.h"
Mike Klein2878e762016-10-19 21:05:17 -040022#include <utility>
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040023
Mike Kleinaebfb452016-10-25 10:27:33 -040024namespace {
25
Mike Klein2878e762016-10-19 21:05:17 -040026#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
27 static constexpr int N = 8;
28#else
29 static constexpr int N = 4;
30#endif
31
mtkleina4a44882016-11-04 13:20:07 -070032 using SkNf = SkNx<N, float>;
Mike Kleind5de0132016-11-28 09:33:02 -050033 using SkNi = SkNx<N, int32_t>;
34 using SkNu = SkNx<N, uint32_t>;
mtkleina4a44882016-11-04 13:20:07 -070035 using SkNh = SkNx<N, uint16_t>;
Mike Klein06a65e22016-11-17 12:39:09 -050036 using SkNb = SkNx<N, uint8_t>;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040037
Mike Kleinaebfb452016-10-25 10:27:33 -040038 struct BodyStage;
39 struct TailStage;
40
41 using Body = void(SK_VECTORCALL *)(BodyStage*, size_t, SkNf,SkNf,SkNf,SkNf,
42 SkNf,SkNf,SkNf,SkNf);
43 using Tail = void(SK_VECTORCALL *)(TailStage*, size_t, size_t, SkNf,SkNf,SkNf,SkNf,
44 SkNf,SkNf,SkNf,SkNf);
45 struct BodyStage { Body next; void* ctx; };
46 struct TailStage { Tail next; void* ctx; };
47
48} // namespace
Mike Klein2878e762016-10-19 21:05:17 -040049
Mike Klein04adfda2016-10-12 09:52:55 -040050#define SI static inline
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040051
Mike Klein49372e62016-10-20 18:05:23 -040052// Stages are logically a pipeline, and physically are contiguous in an array.
53// To get to the next stage, we just increment our pointer to the next array element.
Mike Kleinaebfb452016-10-25 10:27:33 -040054SI void SK_VECTORCALL next(BodyStage* st, size_t x,
55 SkNf r, SkNf g, SkNf b, SkNf a,
56 SkNf dr, SkNf dg, SkNf db, SkNf da) {
57 st->next(st+1, x, r,g,b,a, dr,dg,db,da);
Mike Klein2878e762016-10-19 21:05:17 -040058}
Mike Kleinaebfb452016-10-25 10:27:33 -040059SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail,
60 SkNf r, SkNf g, SkNf b, SkNf a,
61 SkNf dr, SkNf dg, SkNf db, SkNf da) {
62 st->next(st+1, x,tail, r,g,b,a, dr,dg,db,da);
Mike Klein49372e62016-10-20 18:05:23 -040063}
64
Mike Klein2878e762016-10-19 21:05:17 -040065
Mike Kleinaebfb452016-10-25 10:27:33 -040066#define STAGE(name, kCallNext) \
67 template <bool kIsTail> \
68 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
69 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
70 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
71 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
72 SkNf r, SkNf g, SkNf b, SkNf a, \
73 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
74 name##_kernel<false>(st->ctx, x,0, r,g,b,a, dr,dg,db,da); \
75 if (kCallNext) { \
76 next(st, x, r,g,b,a, dr,dg,db,da); \
77 } \
78 } \
79 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
80 SkNf r, SkNf g, SkNf b, SkNf a, \
81 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
82 name##_kernel<true>(st->ctx, x,tail, r,g,b,a, dr,dg,db,da); \
83 if (kCallNext) { \
84 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
85 } \
86 } \
87 template <bool kIsTail> \
88 static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
89 SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
Mike Klein04adfda2016-10-12 09:52:55 -040090 SkNf& dr, SkNf& dg, SkNf& db, SkNf& da)
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040091
Mike Kleinbaaf8ad2016-09-29 09:04:15 -040092
Mike Klein9161ef02016-10-04 14:03:27 -040093// Many xfermodes apply the same logic to each channel.
Mike Kleinaebfb452016-10-25 10:27:33 -040094#define RGBA_XFERMODE(name) \
95 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
96 const SkNf& d, const SkNf& da); \
97 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
98 SkNf r, SkNf g, SkNf b, SkNf a, \
99 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
100 r = name##_kernel(r,a,dr,da); \
101 g = name##_kernel(g,a,dg,da); \
102 b = name##_kernel(b,a,db,da); \
103 a = name##_kernel(a,a,da,da); \
104 next(st, x, r,g,b,a, dr,dg,db,da); \
105 } \
106 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
107 SkNf r, SkNf g, SkNf b, SkNf a, \
108 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
109 r = name##_kernel(r,a,dr,da); \
110 g = name##_kernel(g,a,dg,da); \
111 b = name##_kernel(b,a,db,da); \
112 a = name##_kernel(a,a,da,da); \
113 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
114 } \
115 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400116 const SkNf& d, const SkNf& da)
Mike Klein9161ef02016-10-04 14:03:27 -0400117
118// Most of the rest apply the same logic to color channels and use srcover's alpha logic.
Mike Kleinaebfb452016-10-25 10:27:33 -0400119#define RGB_XFERMODE(name) \
120 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
121 const SkNf& d, const SkNf& da); \
122 SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
123 SkNf r, SkNf g, SkNf b, SkNf a, \
124 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
125 r = name##_kernel(r,a,dr,da); \
126 g = name##_kernel(g,a,dg,da); \
127 b = name##_kernel(b,a,db,da); \
128 a = a + (da * (1.0f-a)); \
129 next(st, x, r,g,b,a, dr,dg,db,da); \
130 } \
131 SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
132 SkNf r, SkNf g, SkNf b, SkNf a, \
133 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
134 r = name##_kernel(r,a,dr,da); \
135 g = name##_kernel(g,a,dg,da); \
136 b = name##_kernel(b,a,db,da); \
137 a = a + (da * (1.0f-a)); \
138 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
139 } \
140 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
Mike Klein04adfda2016-10-12 09:52:55 -0400141 const SkNf& d, const SkNf& da)
142
Mike Kleinaebfb452016-10-25 10:27:33 -0400143SI SkNf inv(const SkNf& x) { return 1.0f - x; }
144
145SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
146 return SkNx_fma(to-from, cov, from);
147}
148
149template <bool kIsTail, typename T>
150SI SkNx<N,T> load(size_t tail, const T* src) {
151 SkASSERT(kIsTail == (tail > 0));
152 // TODO: maskload for 32- and 64-bit T
153 if (kIsTail) {
154 T buf[8] = {0};
155 switch (tail & (N-1)) {
156 case 7: buf[6] = src[6];
157 case 6: buf[5] = src[5];
158 case 5: buf[4] = src[4];
159 case 4: buf[3] = src[3];
160 case 3: buf[2] = src[2];
161 case 2: buf[1] = src[1];
162 }
163 buf[0] = src[0];
164 return SkNx<N,T>::Load(buf);
165 }
166 return SkNx<N,T>::Load(src);
167}
168
169template <bool kIsTail, typename T>
170SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
171 SkASSERT(kIsTail == (tail > 0));
172 // TODO: maskstore for 32- and 64-bit T
173 if (kIsTail) {
174 switch (tail & (N-1)) {
175 case 7: dst[6] = v[6];
176 case 6: dst[5] = v[5];
177 case 5: dst[4] = v[4];
178 case 4: dst[3] = v[3];
179 case 3: dst[2] = v[2];
180 case 2: dst[1] = v[1];
181 }
182 dst[0] = v[0];
183 return;
184 }
185 v.store(dst);
186}
187
Mike Kleind5de0132016-11-28 09:33:02 -0500188SI void from_8888(const SkNu& _8888, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
189 auto to_float = [](const SkNu& v) { return SkNx_cast<float>(SkNi::Load(&v)); };
190 *r = (1/255.0f)*to_float((_8888 >> 0) & 0xff);
191 *g = (1/255.0f)*to_float((_8888 >> 8) & 0xff);
192 *b = (1/255.0f)*to_float((_8888 >> 16) & 0xff);
193 *a = (1/255.0f)*to_float( _8888 >> 24 );
194}
195
Mike Klein6b77f1c2016-11-22 15:50:12 -0500196SI void from_4444(const SkNh& _4444, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
197 auto _32_bit = SkNx_cast<int>(_4444);
198
199 *r = SkNx_cast<float>(_32_bit & (0xF << SK_R4444_SHIFT)) * (1.0f / (0xF << SK_R4444_SHIFT));
200 *g = SkNx_cast<float>(_32_bit & (0xF << SK_G4444_SHIFT)) * (1.0f / (0xF << SK_G4444_SHIFT));
201 *b = SkNx_cast<float>(_32_bit & (0xF << SK_B4444_SHIFT)) * (1.0f / (0xF << SK_B4444_SHIFT));
202 *a = SkNx_cast<float>(_32_bit & (0xF << SK_A4444_SHIFT)) * (1.0f / (0xF << SK_A4444_SHIFT));
203}
204
Mike Kleinaebfb452016-10-25 10:27:33 -0400205SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) {
206 auto _32_bit = SkNx_cast<int>(_565);
207
208 *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE);
209 *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE);
210 *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE);
211}
212
213SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
214 return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT
215 | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT
216 | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT);
217}
218
219STAGE(just_return, false) { }
220
Mike Kleina9312fd2016-11-16 13:38:15 -0500221STAGE(trace, true) {
222 SkDebugf("%s\n", (const char*)ctx);
223}
224
225STAGE(registers, true) {
226 auto print = [](const char* name, const SkNf& v) {
227 SkDebugf("%s:", name);
228 for (int i = 0; i < N; i++) {
229 SkDebugf(" %g", v[i]);
230 }
231 SkDebugf("\n");
232 };
233 print(" r", r);
234 print(" g", g);
235 print(" b", b);
236 print(" a", a);
237 print("dr", dr);
238 print("dg", dg);
239 print("db", db);
240 print("da", da);
241}
242
Mike Klein130863e2016-10-27 11:29:36 -0400243STAGE(clamp_0, true) {
244 a = SkNf::Max(a, 0.0f);
245 r = SkNf::Max(r, 0.0f);
246 g = SkNf::Max(g, 0.0f);
247 b = SkNf::Max(b, 0.0f);
248}
Mike Klein130863e2016-10-27 11:29:36 -0400249
Mike Kleineea7c162016-11-03 10:20:35 -0400250STAGE(clamp_a, true) {
Mike Klein130863e2016-10-27 11:29:36 -0400251 a = SkNf::Min(a, 1.0f);
252 r = SkNf::Min(r, a);
253 g = SkNf::Min(g, a);
254 b = SkNf::Min(b, a);
255}
256
Matt Sarettdb4d4062016-11-16 16:07:15 -0500257STAGE(clamp_1, true) {
258 a = SkNf::Min(a, 1.0f);
259 r = SkNf::Min(r, 1.0f);
260 g = SkNf::Min(g, 1.0f);
261 b = SkNf::Min(b, 1.0f);
262}
263
Mike Kleineea7c162016-11-03 10:20:35 -0400264STAGE(unpremul, true) {
Mike Klein5a130112016-11-28 09:48:31 -0500265 auto scale = (a == 0.0f).thenElse(0.0f, 1.0f/a);
266 r *= scale;
267 g *= scale;
268 b *= scale;
Mike Kleineea7c162016-11-03 10:20:35 -0400269}
270
271STAGE(premul, true) {
272 r *= a;
273 g *= a;
274 b *= a;
275}
276
Mike Kleinc5093412016-11-04 16:36:39 -0400277STAGE(move_src_dst, true) {
278 dr = r;
279 dg = g;
280 db = b;
281 da = a;
Mike Kleinaebfb452016-10-25 10:27:33 -0400282}
283
Mike Kleind5de0132016-11-28 09:33:02 -0500284STAGE(move_dst_src, true) {
285 r = dr;
286 g = dg;
287 b = db;
288 a = da;
Mike Kleinfb191da2016-11-15 13:20:33 -0500289}
290
Mike Kleind5de0132016-11-28 09:33:02 -0500291STAGE(swap_rb, true) {
292 SkTSwap(r, b);
293}
294
295
Mike Kleinaebfb452016-10-25 10:27:33 -0400296// The default shader produces a constant color (from the SkPaint).
297STAGE(constant_color, true) {
298 auto color = (const SkPM4f*)ctx;
299 r = color->r();
300 g = color->g();
301 b = color->b();
302 a = color->a();
303}
304
Mike Klein66866172016-11-03 12:22:01 -0400305// s' = sc for a constant c.
306STAGE(scale_constant_float, true) {
307 SkNf c = *(const float*)ctx;
308
309 r *= c;
310 g *= c;
311 b *= c;
312 a *= c;
313}
314
Mike Kleinaebfb452016-10-25 10:27:33 -0400315// s' = d(1-c) + sc, for a constant c.
316STAGE(lerp_constant_float, true) {
317 SkNf c = *(const float*)ctx;
318
319 r = lerp(dr, r, c);
320 g = lerp(dg, g, c);
321 b = lerp(db, b, c);
322 a = lerp(da, a, c);
323}
324
325// s' = sc for 8-bit c.
326STAGE(scale_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400327 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400328
329 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
330 r = r*c;
331 g = g*c;
332 b = b*c;
333 a = a*c;
334}
335
336// s' = d(1-c) + sc for 8-bit c.
337STAGE(lerp_u8, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400338 auto ptr = *(const uint8_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400339
340 SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
341 r = lerp(dr, r, c);
342 g = lerp(dg, g, c);
343 b = lerp(db, b, c);
344 a = lerp(da, a, c);
345}
346
347// s' = d(1-c) + sc for 565 c.
348STAGE(lerp_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400349 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400350 SkNf cr, cg, cb;
351 from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
352
353 r = lerp(dr, r, cr);
354 g = lerp(dg, g, cg);
355 b = lerp(db, b, cb);
356 a = 1.0f;
357}
358
359STAGE(load_d_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400360 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400361 from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
362 da = 1.0f;
363}
364
365STAGE(load_s_565, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400366 auto ptr = *(const uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400367 from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
368 a = 1.0f;
369}
370
371STAGE(store_565, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400372 auto ptr = *(uint16_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400373 store<kIsTail>(tail, to_565(r,g,b), ptr);
374}
375
376STAGE(load_d_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400377 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400378
379 SkNh rh, gh, bh, ah;
380 if (kIsTail) {
381 uint64_t buf[8] = {0};
382 switch (tail & (N-1)) {
383 case 7: buf[6] = ptr[6];
384 case 6: buf[5] = ptr[5];
385 case 5: buf[4] = ptr[4];
386 case 4: buf[3] = ptr[3];
387 case 3: buf[2] = ptr[2];
388 case 2: buf[1] = ptr[1];
389 }
390 buf[0] = ptr[0];
391 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
392 } else {
393 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
394 }
395
396 dr = SkHalfToFloat_finite_ftz(rh);
397 dg = SkHalfToFloat_finite_ftz(gh);
398 db = SkHalfToFloat_finite_ftz(bh);
399 da = SkHalfToFloat_finite_ftz(ah);
400}
401
402STAGE(load_s_f16, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400403 auto ptr = *(const uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400404
405 SkNh rh, gh, bh, ah;
406 if (kIsTail) {
407 uint64_t buf[8] = {0};
408 switch (tail & (N-1)) {
409 case 7: buf[6] = ptr[6];
410 case 6: buf[5] = ptr[5];
411 case 5: buf[4] = ptr[4];
412 case 4: buf[3] = ptr[3];
413 case 3: buf[2] = ptr[2];
414 case 2: buf[1] = ptr[1];
415 }
416 buf[0] = ptr[0];
417 SkNh::Load4(buf, &rh, &gh, &bh, &ah);
418 } else {
419 SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
420 }
421
422 r = SkHalfToFloat_finite_ftz(rh);
423 g = SkHalfToFloat_finite_ftz(gh);
424 b = SkHalfToFloat_finite_ftz(bh);
425 a = SkHalfToFloat_finite_ftz(ah);
426}
427
428STAGE(store_f16, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400429 auto ptr = *(uint64_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400430
431 uint64_t buf[8];
432 SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
433 SkFloatToHalf_finite_ftz(g),
434 SkFloatToHalf_finite_ftz(b),
435 SkFloatToHalf_finite_ftz(a));
436 if (kIsTail) {
437 switch (tail & (N-1)) {
438 case 7: ptr[6] = buf[6];
439 case 6: ptr[5] = buf[5];
440 case 5: ptr[4] = buf[4];
441 case 4: ptr[3] = buf[3];
442 case 3: ptr[2] = buf[2];
443 case 2: ptr[1] = buf[1];
444 }
445 ptr[0] = buf[0];
446 }
447}
448
mtkleina4a44882016-11-04 13:20:07 -0700449STAGE(store_f32, false) {
450 auto ptr = *(SkPM4f**)ctx + x;
451
452 SkPM4f buf[8];
453 SkNf::Store4(kIsTail ? buf : ptr, r,g,b,a);
454 if (kIsTail) {
455 switch (tail & (N-1)) {
456 case 7: ptr[6] = buf[6];
457 case 6: ptr[5] = buf[5];
458 case 5: ptr[4] = buf[4];
459 case 4: ptr[3] = buf[3];
460 case 3: ptr[2] = buf[2];
461 case 2: ptr[1] = buf[1];
462 }
463 ptr[0] = buf[0];
464 }
465}
466
Mike Kleinaebfb452016-10-25 10:27:33 -0400467
468// Load 8-bit SkPMColor-order sRGB.
469STAGE(load_d_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400470 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400471
472 auto px = load<kIsTail>(tail, ptr);
473 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
474 dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
475 dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
476 db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
477 da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
478}
479
480STAGE(load_s_srgb, true) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400481 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleinaebfb452016-10-25 10:27:33 -0400482
483 auto px = load<kIsTail>(tail, ptr);
484 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
485 r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
486 g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
487 b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
488 a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
489}
490
491STAGE(store_srgb, false) {
Mike Kleinbd3fe472016-10-25 15:43:46 -0400492 auto ptr = *(uint32_t**)ctx + x;
Mike Klein6b059bd2016-11-02 14:47:07 -0400493 store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT
494 | sk_linear_to_srgb(g) << SK_G32_SHIFT
495 | sk_linear_to_srgb(b) << SK_B32_SHIFT
496 | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), (int*)ptr);
Mike Kleinaebfb452016-10-25 10:27:33 -0400497}
498
raftias25636012016-11-11 15:27:39 -0800499STAGE(load_s_8888, true) {
500 auto ptr = *(const uint32_t**)ctx + x;
Mike Kleind5de0132016-11-28 09:33:02 -0500501 from_8888(load<kIsTail>(tail, ptr), &r, &g, &b, &a);
raftias25636012016-11-11 15:27:39 -0800502}
503
504STAGE(store_8888, false) {
505 auto ptr = *(uint32_t**)ctx + x;
506 store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0
507 | SkNx_cast<int>(255.0f * g + 0.5f) << 8
508 | SkNx_cast<int>(255.0f * b + 0.5f) << 16
509 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
510}
511
Mike Kleinaebfb452016-10-25 10:27:33 -0400512RGBA_XFERMODE(clear) { return 0.0f; }
513//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
514RGBA_XFERMODE(dst) { return d; }
515
516RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
517RGBA_XFERMODE(srcin) { return s * da; }
518RGBA_XFERMODE(srcout) { return s * inv(da); }
519RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
520RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
521RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
522RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); }
523RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); }
524
525RGBA_XFERMODE(modulate) { return s*d; }
526RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; }
527RGBA_XFERMODE(plus_) { return s + d; }
528RGBA_XFERMODE(screen) { return s + d - s*d; }
529RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); }
530
531RGB_XFERMODE(colorburn) {
532 return (d == da ).thenElse(d + s*inv(da),
533 (s == 0.0f).thenElse(s + d*inv(sa),
534 sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa)));
535}
536RGB_XFERMODE(colordodge) {
537 return (d == 0.0f).thenElse(d + s*inv(da),
538 (s == sa ).thenElse(s + d*inv(sa),
539 sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa)));
540}
541RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); }
542RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); }
543RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; }
544RGB_XFERMODE(hardlight) {
545 return s*inv(da) + d*inv(sa)
546 + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s));
547}
548RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); }
549RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); }
550RGB_XFERMODE(softlight) {
551 SkNf m = (da > 0.0f).thenElse(d / da, 0.0f),
552 s2 = 2.0f*s,
553 m4 = 4.0f*m;
554
555 // The logic forks three ways:
556 // 1. dark src?
557 // 2. light src, dark dst?
558 // 3. light src, light dst?
559 SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1.
560 darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2.
561 liteDst = m.rsqrt().invert() - m, // Used in case 3.
562 liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3?
563 return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
564}
565
Mike Klein1f49f262016-10-31 19:49:27 -0400566STAGE(luminance_to_alpha, true) {
567 a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b;
568 r = g = b = 0;
569}
570
Mike Klein06a65e22016-11-17 12:39:09 -0500571STAGE(matrix_2x3, true) {
572 auto m = (const float*)ctx;
573
574 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
575 auto R = fma(r,m[0], fma(g,m[2], m[4])),
576 G = fma(r,m[1], fma(g,m[3], m[5]));
577 r = R;
578 g = G;
579}
580
raftias25636012016-11-11 15:27:39 -0800581STAGE(matrix_3x4, true) {
582 auto m = (const float*)ctx;
583
584 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
585 auto R = fma(r,m[0], fma(g,m[3], fma(b,m[6], m[ 9]))),
586 G = fma(r,m[1], fma(g,m[4], fma(b,m[7], m[10]))),
587 B = fma(r,m[2], fma(g,m[5], fma(b,m[8], m[11])));
588 r = R;
589 g = G;
590 b = B;
591}
592
Mike Kleineea7c162016-11-03 10:20:35 -0400593STAGE(matrix_4x5, true) {
594 auto m = (const float*)ctx;
595
596 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
597 auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
598 G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
599 B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
600 A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
601 r = R;
602 g = G;
603 b = B;
604 a = A;
605}
Mike Kleinaebfb452016-10-25 10:27:33 -0400606
Mike Kleinc01e7df2016-11-17 16:27:10 -0500607STAGE(matrix_perspective, true) {
608 // N.B. unlike the matrix_NxM stages, this takes a row-major matrix.
609 auto m = (const float*)ctx;
610
611 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
612 auto R = fma(r,m[0], fma(g,m[1], m[2])),
613 G = fma(r,m[3], fma(g,m[4], m[5])),
614 Z = fma(r,m[6], fma(g,m[7], m[8]));
615 r = R * Z.invert();
616 g = G * Z.invert();
617}
618
619
Mike Kleincfcf6242016-11-16 09:01:30 -0500620SI SkNf parametric(const SkNf& v, const SkColorSpaceTransferFn& p) {
621 float result[N]; // Unconstrained powf() doesn't vectorize well...
622 for (int i = 0; i < N; i++) {
623 float s = v[i];
624 result[i] = (s <= p.fD) ? p.fE * s + p.fF
625 : powf(s * p.fA + p.fB, p.fG) + p.fC;
626 }
627 return SkNf::Load(result);
628}
629
630STAGE(parametric_r, true) {
631 r = parametric(r, *(const SkColorSpaceTransferFn*)ctx);
632}
633STAGE(parametric_g, true) {
634 g = parametric(g, *(const SkColorSpaceTransferFn*)ctx);
635}
636STAGE(parametric_b, true) {
637 b = parametric(b, *(const SkColorSpaceTransferFn*)ctx);
638}
639
Matt Sarettdb4d4062016-11-16 16:07:15 -0500640SI SkNf table(const SkNf& v, const SkTableTransferFn& table) {
641 float result[N];
Mike Kleincfcf6242016-11-16 09:01:30 -0500642 for (int i = 0; i < N; i++) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500643 result[i] = interp_lut(v[i], table.fData, table.fSize);
Mike Kleincfcf6242016-11-16 09:01:30 -0500644 }
645 return SkNf::Load(result);
646}
647
648STAGE(table_r, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500649 r = table(r, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500650}
651STAGE(table_g, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500652 g = table(g, *(const SkTableTransferFn*)ctx);
Mike Kleincfcf6242016-11-16 09:01:30 -0500653}
654STAGE(table_b, true) {
Matt Sarettdb4d4062016-11-16 16:07:15 -0500655 b = table(b, *(const SkTableTransferFn*)ctx);
raftias25636012016-11-11 15:27:39 -0800656}
657
658STAGE(color_lookup_table, true) {
659 const SkColorLookUpTable* colorLUT = (const SkColorLookUpTable*)ctx;
660 float rgb[3];
661 float result[3][N];
662 for (int i = 0; i < N; ++i) {
663 rgb[0] = r[i];
664 rgb[1] = g[i];
665 rgb[2] = b[i];
666 colorLUT->interp3D(rgb, rgb);
667 result[0][i] = rgb[0];
668 result[1][i] = rgb[1];
669 result[2][i] = rgb[2];
670 }
671 r = SkNf::Load(result[0]);
672 g = SkNf::Load(result[1]);
673 b = SkNf::Load(result[2]);
674}
675
676STAGE(lab_to_xyz, true) {
677 const auto lab_l = r * 100.0f;
678 const auto lab_a = g * 255.0f - 128.0f;
679 const auto lab_b = b * 255.0f - 128.0f;
680 auto Y = (lab_l + 16.0f) * (1/116.0f);
681 auto X = lab_a * (1/500.0f) + Y;
682 auto Z = Y - (lab_b * (1/200.0f));
683
684 const auto X3 = X*X*X;
685 X = (X3 > 0.008856f).thenElse(X3, (X - (16/116.0f)) * (1/7.787f));
686 const auto Y3 = Y*Y*Y;
687 Y = (Y3 > 0.008856f).thenElse(Y3, (Y - (16/116.0f)) * (1/7.787f));
688 const auto Z3 = Z*Z*Z;
689 Z = (Z3 > 0.008856f).thenElse(Z3, (Z - (16/116.0f)) * (1/7.787f));
690
691 // adjust to D50 illuminant
692 X *= 0.96422f;
693 Y *= 1.00000f;
694 Z *= 0.82521f;
695
696 r = X;
697 g = Y;
698 b = Z;
699}
700
Mike Kleinb273fc42016-11-17 15:42:22 -0500701SI SkNf assert_in_tile(const SkNf& v, float limit) {
702 for (int i = 0; i < N; i++) {
703 SkASSERT(0 <= v[i] && v[i] < limit);
704 }
705 return v;
Mike Klein06a65e22016-11-17 12:39:09 -0500706}
Mike Kleinb273fc42016-11-17 15:42:22 -0500707
708SI SkNf clamp(const SkNf& v, float limit) {
709 SkNf result = SkNf::Max(0, SkNf::Min(v, limit - 0.5f));
710 return assert_in_tile(result, limit);
Mike Klein06a65e22016-11-17 12:39:09 -0500711}
712
Mike Kleinb273fc42016-11-17 15:42:22 -0500713SI SkNf repeat(const SkNf& v, float limit) {
714 SkNf result = v - (v/limit).floor()*limit;
Mike Kleinb273fc42016-11-17 15:42:22 -0500715 // For small negative v, (v/limit).floor()*limit can dominate v in the subtraction,
716 // which leaves result == limit. We want result < limit, so clamp it one ULP.
717 result = SkNf::Min(result, nextafterf(limit, 0));
Mike Kleinb273fc42016-11-17 15:42:22 -0500718 return assert_in_tile(result, limit);
719}
720
Mike Klein2e35e8a2016-11-18 15:47:22 -0500721SI SkNf mirror(const SkNf& v, float l/*imit*/) {
722 SkNf result = ((v - l) - ((v - l) / (2*l)).floor()*(2*l) - l).abs();
723 // Same deal as repeat.
724 result = SkNf::Min(result, nextafterf(l, 0));
725 return assert_in_tile(result, l);
726}
727
Mike Kleinb273fc42016-11-17 15:42:22 -0500728STAGE(clamp_x, true) { r = clamp (r, *(const int*)ctx); }
729STAGE(clamp_y, true) { g = clamp (g, *(const int*)ctx); }
730STAGE(repeat_x, true) { r = repeat(r, *(const int*)ctx); }
731STAGE(repeat_y, true) { g = repeat(g, *(const int*)ctx); }
Mike Klein2e35e8a2016-11-18 15:47:22 -0500732STAGE(mirror_x, true) { r = mirror(r, *(const int*)ctx); }
733STAGE(mirror_y, true) { g = mirror(g, *(const int*)ctx); }
Mike Klein06a65e22016-11-17 12:39:09 -0500734
Mike Klein46e66a22016-11-21 16:19:34 -0500735STAGE(top_left, true) {
736 auto sc = (SkImageShaderContext*)ctx;
Mike Klein06a65e22016-11-17 12:39:09 -0500737
Mike Klein46e66a22016-11-21 16:19:34 -0500738 r.store(sc->x);
739 g.store(sc->y);
740
741 r -= 0.5f;
742 g -= 0.5f;
743
744 auto fx = r - r.floor(),
745 fy = g - g.floor();
746 b = (1.0f - fx) * (1.0f - fy);
747};
748
749STAGE(top_right, true) {
750 auto sc = (const SkImageShaderContext*)ctx;
751
752 r = SkNf::Load(sc->x) + 0.5f;
753 g = SkNf::Load(sc->y) - 0.5f;
754
755 auto fx = r - r.floor(),
756 fy = g - g.floor();
757 b = fx * (1.0f - fy);
758};
759
760STAGE(bottom_left, true) {
761 auto sc = (const SkImageShaderContext*)ctx;
762
763 r = SkNf::Load(sc->x) - 0.5f;
764 g = SkNf::Load(sc->y) + 0.5f;
765
766 auto fx = r - r.floor(),
767 fy = g - g.floor();
768 b = (1.0f - fx) * fy;
769};
770
771STAGE(bottom_right, true) {
772 auto sc = (const SkImageShaderContext*)ctx;
773
774 r = SkNf::Load(sc->x) + 0.5f;
775 g = SkNf::Load(sc->y) + 0.5f;
776
777 auto fx = r - r.floor(),
778 fy = g - g.floor();
779 b = fx * fy;
Mike Klein06a65e22016-11-17 12:39:09 -0500780};
781
Mike Kleincb2c12b2016-11-22 13:22:48 -0500782template <typename T>
783SI SkNi offset_and_ptr(T** ptr, const void* ctx, const SkNf& x, const SkNf& y) {
Mike Klein46e66a22016-11-21 16:19:34 -0500784 auto sc = (const SkImageShaderContext*)ctx;
Mike Klein06a65e22016-11-17 12:39:09 -0500785
Mike Kleincb2c12b2016-11-22 13:22:48 -0500786 SkNi ix = SkNx_cast<int>(x),
787 iy = SkNx_cast<int>(y);
Mike Klein46e66a22016-11-21 16:19:34 -0500788 SkNi offset = iy*sc->stride + ix;
Mike Klein06a65e22016-11-17 12:39:09 -0500789
Mike Kleincb2c12b2016-11-22 13:22:48 -0500790 *ptr = (const T*)sc->pixels;
791 return offset;
792}
793
Mike Kleind5de0132016-11-28 09:33:02 -0500794template <typename T>
795SI void gather(T (&dst)[N], const T* src, const SkNi& offset, size_t tail) {
796 size_t n = tail ? tail : N;
797 for (size_t i = 0; i < n; i++) { dst[i] = src[offset[i]]; }
798 for (size_t i = n; i < N; i++) { dst[i] = 0; }
799}
800
Mike Kleincb5338c2016-11-22 14:58:45 -0500801STAGE(accum_a8, true) {} // TODO
802
Mike Kleincb5338c2016-11-22 14:58:45 -0500803STAGE(accum_i8, true) {} // TODO
804STAGE(accum_i8_srgb, true) {} // TODO
805
Mike Klein6b77f1c2016-11-22 15:50:12 -0500806STAGE(accum_g8, true) {
807 const uint8_t* p;
808 SkNi offset = offset_and_ptr(&p, ctx, r, g);
809
810 uint8_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500811 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500812
813 SkNf gray = SkNx_cast<float>(SkNb::Load(px)) * (1/255.0f);
814
815 SkNf scale = b;
816 dr += scale * gray;
817 dg += scale * gray;
818 db += scale * gray;
819 da += scale;
820}
821STAGE(accum_g8_srgb, true) {
822 const uint8_t* p;
823 SkNi offset = offset_and_ptr(&p, ctx, r, g);
824
825 uint8_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500826 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500827
828 SkNf gray = sk_linear_from_srgb_math(SkNx_cast<int>(SkNb::Load(px)));
829
830 SkNf scale = b;
831 dr += scale * gray;
832 dg += scale * gray;
833 db += scale * gray;
834 da += scale;
835}
836
Mike Kleincb2c12b2016-11-22 13:22:48 -0500837STAGE(accum_565, true) {
838 const uint16_t* p;
839 SkNi offset = offset_and_ptr(&p, ctx, r, g);
840
841 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500842 gather(px, p, offset, tail);
843
Mike Kleincb2c12b2016-11-22 13:22:48 -0500844 SkNf R,G,B;
845 from_565(SkNh::Load(px), &R, &G, &B);
846
847 SkNf scale = b;
848 dr += scale * R;
849 dg += scale * G;
850 db += scale * B;
851 da += scale;
852}
Mike Kleincb5338c2016-11-22 14:58:45 -0500853STAGE(accum_565_srgb, true) {
854 const uint16_t* p;
855 SkNi offset = offset_and_ptr(&p, ctx, r, g);
856
857 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500858 gather(px, p, offset, tail);
859
Mike Kleincb5338c2016-11-22 14:58:45 -0500860 SkNf R,G,B;
861 from_565(SkNh::Load(px), &R, &G, &B);
862
863 SkNf scale = b;
864 dr += scale * sk_linear_from_srgb_math(R);
865 dg += scale * sk_linear_from_srgb_math(G);
866 db += scale * sk_linear_from_srgb_math(B);
867 da += scale;
868}
869
Mike Klein6b77f1c2016-11-22 15:50:12 -0500870STAGE(accum_4444, true) {
871 const uint16_t* p;
872 SkNi offset = offset_and_ptr(&p, ctx, r, g);
873
874 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500875 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500876
877 SkNf R,G,B,A;
878 from_4444(SkNh::Load(px), &R, &G, &B, &A);
879
880 SkNf scale = b;
881 dr += scale * R;
882 dg += scale * G;
883 db += scale * B;
884 da += scale * A;
885}
886STAGE(accum_4444_srgb, true) {
887 const uint16_t* p;
888 SkNi offset = offset_and_ptr(&p, ctx, r, g);
889
890 uint16_t px[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500891 gather(px, p, offset, tail);
Mike Klein6b77f1c2016-11-22 15:50:12 -0500892
893 SkNf R,G,B,A;
894 from_4444(SkNh::Load(px), &R, &G, &B, &A);
895
896 SkNf scale = b;
897 dr += scale * sk_linear_from_srgb_math(R);
898 dg += scale * sk_linear_from_srgb_math(G);
899 db += scale * sk_linear_from_srgb_math(B);
900 da += scale * A;
901}
Mike Kleincb5338c2016-11-22 14:58:45 -0500902
903STAGE(accum_8888, true) {
904 const uint32_t* p;
905 SkNi offset = offset_and_ptr(&p, ctx, r, g);
906
Mike Kleind5de0132016-11-28 09:33:02 -0500907 uint32_t px[N];
908 gather(px, p, offset, tail);
909
910 SkNf R,G,B,A;
911 from_8888(SkNu::Load(px), &R, &G, &B, &A);
Mike Kleincb5338c2016-11-22 14:58:45 -0500912
913 SkNf scale = b;
Mike Kleind5de0132016-11-28 09:33:02 -0500914 dr += scale * R;
915 dg += scale * G;
916 db += scale * B;
917 da += scale * A;
Mike Kleincb5338c2016-11-22 14:58:45 -0500918}
919STAGE(accum_8888_srgb, true) {
920 const uint32_t* p;
921 SkNi offset = offset_and_ptr(&p, ctx, r, g);
922
Mike Kleind5de0132016-11-28 09:33:02 -0500923 uint32_t px[N];
924 gather(px, p, offset, tail);
925
926 SkNf R,G,B,A;
927 from_8888(SkNu::Load(px), &R, &G, &B, &A);
Mike Kleincb5338c2016-11-22 14:58:45 -0500928
929 SkNf scale = b;
Mike Kleind5de0132016-11-28 09:33:02 -0500930 dr += scale * sk_linear_from_srgb_math(R);
931 dg += scale * sk_linear_from_srgb_math(G);
932 db += scale * sk_linear_from_srgb_math(B);
933 da += scale * A;
Mike Kleincb5338c2016-11-22 14:58:45 -0500934}
Mike Kleincb2c12b2016-11-22 13:22:48 -0500935
936STAGE(accum_f16, true) {
937 const uint64_t* p;
938 SkNi offset = offset_and_ptr(&p, ctx, r, g);
939
Mike Kleind5de0132016-11-28 09:33:02 -0500940 // f16 -> f32 conversion works best with tightly packed f16s,
941 // so we gather each component rather than using gather().
Mike Kleincb2c12b2016-11-22 13:22:48 -0500942 uint16_t R[N], G[N], B[N], A[N];
Mike Kleind5de0132016-11-28 09:33:02 -0500943 size_t n = tail ? tail : N;
944 for (size_t i = 0; i < n; i++) {
Mike Kleincb2c12b2016-11-22 13:22:48 -0500945 uint64_t rgba = p[offset[i]];
946 R[i] = rgba >> 0;
947 G[i] = rgba >> 16;
948 B[i] = rgba >> 32;
949 A[i] = rgba >> 48;
950 }
Mike Kleind5de0132016-11-28 09:33:02 -0500951 for (size_t i = n; i < N; i++) {
952 R[i] = G[i] = B[i] = A[i] = 0;
953 }
Mike Kleincb2c12b2016-11-22 13:22:48 -0500954 SkNf scale = b;
955 dr += scale * SkHalfToFloat_finite_ftz(SkNh::Load(R));
956 dg += scale * SkHalfToFloat_finite_ftz(SkNh::Load(G));
957 db += scale * SkHalfToFloat_finite_ftz(SkNh::Load(B));
958 da += scale * SkHalfToFloat_finite_ftz(SkNh::Load(A));
959}
960
Mike Klein06a65e22016-11-17 12:39:09 -0500961
Mike Kleinaebfb452016-10-25 10:27:33 -0400962template <typename Fn>
963SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
964 switch (st) {
965 #define M(stage) case SkRasterPipeline::stage: return stage;
966 SK_RASTER_PIPELINE_STAGES(M)
967 #undef M
968 }
969 SkASSERT(false);
970 return just_return;
971}
Mike Klein9161ef02016-10-04 14:03:27 -0400972
Mike Kleinbaaf8ad2016-09-29 09:04:15 -0400973namespace SK_OPTS_NS {
974
Mike Kleinad48a702016-11-07 17:16:21 -0500975 struct Memset16 {
976 uint16_t** dst;
977 uint16_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500978 void operator()(size_t x, size_t, size_t n) { sk_memset16(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500979 };
980
981 struct Memset32 {
982 uint32_t** dst;
983 uint32_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500984 void operator()(size_t x, size_t, size_t n) { sk_memset32(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500985 };
986
987 struct Memset64 {
988 uint64_t** dst;
989 uint64_t val;
Mike Kleinaf49b192016-11-15 08:52:04 -0500990 void operator()(size_t x, size_t, size_t n) { sk_memset64(*dst + x, val, n); }
Mike Kleinad48a702016-11-07 17:16:21 -0500991 };
992
Mike Kleinaf49b192016-11-15 08:52:04 -0500993 SI std::function<void(size_t, size_t, size_t)>
994 compile_pipeline(const SkRasterPipeline::Stage* stages, int nstages) {
mtklein125b2aa2016-11-04 13:41:34 -0700995 if (nstages == 2 && stages[0].stage == SkRasterPipeline::constant_color) {
996 SkPM4f src = *(const SkPM4f*)stages[0].ctx;
997 void* dst = stages[1].ctx;
998 switch (stages[1].stage) {
Mike Kleinad48a702016-11-07 17:16:21 -0500999 case SkRasterPipeline::store_565:
1000 return Memset16{(uint16_t**)dst, SkPackRGB16(src.r() * SK_R16_MASK + 0.5f,
1001 src.g() * SK_G16_MASK + 0.5f,
1002 src.b() * SK_B16_MASK + 0.5f)};
1003 case SkRasterPipeline::store_srgb:
1004 return Memset32{(uint32_t**)dst, Sk4f_toS32(src.to4f_pmorder())};
mtklein125b2aa2016-11-04 13:41:34 -07001005
Mike Kleinad48a702016-11-07 17:16:21 -05001006 case SkRasterPipeline::store_f16:
1007 return Memset64{(uint64_t**)dst, src.toF16()};
mtklein125b2aa2016-11-04 13:41:34 -07001008
1009 default: break;
1010 }
1011 }
1012
Mike Kleine9f74b82016-10-25 13:31:21 -04001013 struct Compiled {
1014 Compiled(const SkRasterPipeline::Stage* stages, int nstages) {
1015 if (nstages == 0) {
1016 return;
1017 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001018
Mike Kleine9f74b82016-10-25 13:31:21 -04001019 fBodyStart = enum_to_Fn<Body>(stages[0].stage);
1020 fTailStart = enum_to_Fn<Tail>(stages[0].stage);
1021 for (int i = 0; i < nstages-1; i++) {
1022 fBody[i].next = enum_to_Fn<Body>(stages[i+1].stage);
1023 fTail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
1024 fBody[i].ctx = fTail[i].ctx = stages[i].ctx;
1025 }
1026 fBody[nstages-1].next = just_return;
1027 fTail[nstages-1].next = just_return;
1028 fBody[nstages-1].ctx = fTail[nstages-1].ctx = stages[nstages-1].ctx;
Mike Klein050ffa92016-10-20 16:20:46 -04001029 }
Mike Kleinaebfb452016-10-25 10:27:33 -04001030
Mike Kleinaf49b192016-11-15 08:52:04 -05001031 void operator()(size_t x, size_t y, size_t n) {
Mike Kleinaf49b192016-11-15 08:52:04 -05001032 float dx[] = { 0,1,2,3,4,5,6,7 };
Mike Klein0f91ea42016-11-15 10:31:38 -05001033 SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
Mike Kleinf7f883b2016-11-21 15:09:45 -05001034 Y = SkNf(y) + 0.5f,
1035 _0 = SkNf(0),
1036 _1 = SkNf(1);
Mike Kleinaf49b192016-11-15 08:52:04 -05001037
Mike Kleine9f74b82016-10-25 13:31:21 -04001038 while (n >= N) {
Mike Kleinf7f883b2016-11-21 15:09:45 -05001039 fBodyStart(fBody, x, X,Y,_1,_0, _0,_0,_0,_0);
Mike Klein0f91ea42016-11-15 10:31:38 -05001040 X += (float)N;
Mike Kleine9f74b82016-10-25 13:31:21 -04001041 x += N;
1042 n -= N;
1043 }
1044 if (n) {
Mike Kleinf7f883b2016-11-21 15:09:45 -05001045 fTailStart(fTail, x,n, X,Y,_1,_0, _0,_0,_0,_0);
Mike Kleine9f74b82016-10-25 13:31:21 -04001046 }
Mike Klein050ffa92016-10-20 16:20:46 -04001047 }
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001048
Mike Kleine9f74b82016-10-25 13:31:21 -04001049 Body fBodyStart = just_return;
1050 Tail fTailStart = just_return;
1051
1052 BodyStage fBody[SkRasterPipeline::kMaxStages];
1053 TailStage fTail[SkRasterPipeline::kMaxStages];
1054
1055 } fn { stages, nstages };
1056 return fn;
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001057 }
1058
Mike Kleinaebfb452016-10-25 10:27:33 -04001059} // namespace SK_OPTS_NS
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001060
Mike Klein04adfda2016-10-12 09:52:55 -04001061#undef SI
1062#undef STAGE
1063#undef RGBA_XFERMODE
1064#undef RGB_XFERMODE
Mike Klein9161ef02016-10-04 14:03:27 -04001065
Mike Kleinbaaf8ad2016-09-29 09:04:15 -04001066#endif//SkRasterPipeline_opts_DEFINED