blob: d0684828418cd2ccdcd79a6f0561394d8e4bf96c [file] [log] [blame]
Mike Klein7b7077c2019-06-03 17:10:59 -05001/*
2 * Copyright 2019 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "tools/SkVMBuilders.h"
9
10// Some parts of this builder code are written less fluently than possible,
11// to avoid any ambiguity of function argument evaluation order. This lets
12// our golden tests work portably. In general there's no reason to fear
13// nesting calls to Builder routines.
14
15SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050016 auto byte_to_f32 = [&](skvm::I32 byte) {
17 skvm::F32 _1_255 = splat(1/255.0f);
18 return mul(_1_255, to_f32(byte));
19 };
20
Mike Klein2616efd2019-07-15 10:04:08 -050021 auto load = [&](Fmt fmt, skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
22 skvm::Arg ptr;
Mike Klein7b7077c2019-06-03 17:10:59 -050023 switch (fmt) {
24 case Fmt::A8: {
Mike Klein5591fdf2019-07-30 09:44:30 -050025 ptr = varying<uint8_t>();
Mike Klein7b7077c2019-06-03 17:10:59 -050026 *r = *g = *b = splat(0.0f);
27 *a = byte_to_f32(load8(ptr));
28 } break;
29
30 case Fmt::G8: {
Mike Klein5591fdf2019-07-30 09:44:30 -050031 ptr = varying<uint8_t>();
Mike Klein7b7077c2019-06-03 17:10:59 -050032 *r = *g = *b = byte_to_f32(load8(ptr));
33 *a = splat(1.0f);
34 } break;
35
36 case Fmt::RGBA_8888: {
Mike Klein5591fdf2019-07-30 09:44:30 -050037 ptr = varying<int>();
Mike Klein9656dce2019-06-04 11:33:25 -050038 skvm::I32 rgba = load32(ptr);
Mike Kleina6307322019-06-07 15:44:26 -050039 *r = byte_to_f32(extract(rgba, 0, splat(0xff)));
40 *g = byte_to_f32(extract(rgba, 8, splat(0xff)));
41 *b = byte_to_f32(extract(rgba, 16, splat(0xff)));
42 *a = byte_to_f32(extract(rgba, 24, splat(0xff)));
Mike Klein7b7077c2019-06-03 17:10:59 -050043 } break;
44 }
Mike Klein2616efd2019-07-15 10:04:08 -050045 return ptr;
Mike Klein7b7077c2019-06-03 17:10:59 -050046 };
47
48 skvm::F32 r,g,b,a;
Mike Klein2616efd2019-07-15 10:04:08 -050049 (void)load(srcFmt, &r,&g,&b,&a);
Mike Klein7b7077c2019-06-03 17:10:59 -050050
51 skvm::F32 dr,dg,db,da;
Mike Klein2616efd2019-07-15 10:04:08 -050052 skvm::Arg dst = load(dstFmt, &dr,&dg,&db,&da);
Mike Klein7b7077c2019-06-03 17:10:59 -050053
54 skvm::F32 invA = sub(splat(1.0f), a);
55 r = mad(dr, invA, r);
56 g = mad(dg, invA, g);
57 b = mad(db, invA, b);
58 a = mad(da, invA, a);
59
60 auto f32_to_byte = [&](skvm::F32 f32) {
Mike Klein4135cf02019-11-08 14:18:06 -060061 return round(mul(f32, splat(255.0f)));
Mike Klein7b7077c2019-06-03 17:10:59 -050062 };
63 switch (dstFmt) {
64 case Fmt::A8: {
65 store8(dst, f32_to_byte(a));
66 } break;
67
68 case Fmt::G8: {
69 skvm::F32 _2126 = splat(0.2126f),
70 _7152 = splat(0.7152f),
71 _0722 = splat(0.0722f);
72 store8(dst, f32_to_byte(mad(r, _2126,
73 mad(g, _7152,
74 mul(b, _0722)))));
75 } break;
76
77 case Fmt::RGBA_8888: {
Mike Klein1665aaa2019-06-04 10:41:49 -050078 skvm::I32 R = f32_to_byte(r),
79 G = f32_to_byte(g),
80 B = f32_to_byte(b),
81 A = f32_to_byte(a);
Mike Klein7b7077c2019-06-03 17:10:59 -050082
Mike Klein1665aaa2019-06-04 10:41:49 -050083 R = pack(R, G, 8);
84 B = pack(B, A, 8);
85 R = pack(R, B, 16);
Mike Klein7b7077c2019-06-03 17:10:59 -050086
87 store32(dst, R);
88 } break;
89 }
90}
91
Mike Klein397fc882019-06-20 11:37:10 -050092SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
Mike Klein5591fdf2019-07-30 09:44:30 -050093 skvm::Arg src = varying<int>(),
94 dst = varying<int>();
Mike Klein397fc882019-06-20 11:37:10 -050095
96 auto load = [&](skvm::Arg ptr,
97 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
98 skvm::I32 rgba = load32(ptr);
99 *r = extract(rgba, 0, splat(0xff));
100 *g = extract(rgba, 8, splat(0xff));
101 *b = extract(rgba, 16, splat(0xff));
102 *a = extract(rgba, 24, splat(0xff));
103 };
104
105 skvm::I32 r,g,b,a;
106 load(src, &r,&g,&b,&a);
107
108 skvm::I32 dr,dg,db,da;
109 load(dst, &dr,&dg,&db,&da);
110
111 // (xy + x)/256 is a good approximation of (xy + 127)/255
112 //
113 // == (d*(255-a) + d)/256
114 // == (d*(255-a+1) )/256
115 // == (d*(256-a ) )/256
116
117 skvm::I32 invA = sub(splat(256), a);
118 r = add(r, shr(mul(dr, invA), 8));
119 g = add(g, shr(mul(dg, invA), 8));
120 b = add(b, shr(mul(db, invA), 8));
121 a = add(a, shr(mul(da, invA), 8));
122
123 r = pack(r, g, 8);
124 b = pack(b, a, 8);
125 r = pack(r, b, 16);
126 store32(dst, r);
127}
128
Mike Klein7b7077c2019-06-03 17:10:59 -0500129SrcoverBuilder_I32::SrcoverBuilder_I32() {
Mike Klein5591fdf2019-07-30 09:44:30 -0500130 skvm::Arg src = varying<int>(),
131 dst = varying<int>();
Mike Klein7b7077c2019-06-03 17:10:59 -0500132
133 auto load = [&](skvm::Arg ptr,
134 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
Mike Klein9656dce2019-06-04 11:33:25 -0500135 skvm::I32 rgba = load32(ptr);
Mike Klein342b1b22019-06-13 16:43:18 -0500136 *r = bit_and(rgba, splat(0xff));
137 *g = bytes (rgba, 0x0002);
138 *b = bytes (rgba, 0x0003);
Mike Klein35389082019-06-13 11:29:26 -0500139 *a = shr (rgba, 24);
Mike Klein7b7077c2019-06-03 17:10:59 -0500140 };
141
Mike Klein7b7077c2019-06-03 17:10:59 -0500142 skvm::I32 r,g,b,a;
143 load(src, &r,&g,&b,&a);
144
145 skvm::I32 dr,dg,db,da;
146 load(dst, &dr,&dg,&db,&da);
147
Mike Klein821f5e82019-06-13 10:56:51 -0500148 // (xy + x)/256 is a good approximation of (xy + 127)/255
149 //
150 // == (d*(255-a) + d)/256
151 // == (d*(255-a+1) )/256
152 // == (d*(256-a ) )/256
153
Mike Klein35389082019-06-13 11:29:26 -0500154 // We're doing 8x8 bit multiplies in 32-bit lanes.
155 // Since the inputs and results both fit in 16 bits,
156 // we can use mul_16x2, which tends to be faster than mul.
157 //
158 // (The top 2 zero bytes of the inputs will also multiply
159 // with each other to produce zero... perfect.)
160
Mike Klein821f5e82019-06-13 10:56:51 -0500161 skvm::I32 invA = sub(splat(256), a);
Mike Klein35389082019-06-13 11:29:26 -0500162 r = add(r, shr(mul_16x2(dr, invA), 8));
163 g = add(g, shr(mul_16x2(dg, invA), 8));
164 b = add(b, shr(mul_16x2(db, invA), 8));
165 a = add(a, shr(mul_16x2(da, invA), 8));
Mike Klein7b7077c2019-06-03 17:10:59 -0500166
Mike Klein1665aaa2019-06-04 10:41:49 -0500167 r = pack(r, g, 8);
168 b = pack(b, a, 8);
169 r = pack(r, b, 16);
Mike Klein7b7077c2019-06-03 17:10:59 -0500170 store32(dst, r);
171}
172
173SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
Mike Klein5591fdf2019-07-30 09:44:30 -0500174 skvm::Arg src = varying<int>(),
175 dst = varying<int>();
Mike Klein7b7077c2019-06-03 17:10:59 -0500176
Mike Klein7f061fb2019-06-13 13:12:38 -0500177 // The s += d*invA adds won't overflow,
178 // so we don't have to unpack s beyond grabbing the alpha channel.
179 skvm::I32 s = load32(src),
Mike Klein342b1b22019-06-13 16:43:18 -0500180 ax2 = bytes(s, 0x0404); // rgba -> a0a0
Mike Klein7b7077c2019-06-03 17:10:59 -0500181
Mike Klein7f061fb2019-06-13 13:12:38 -0500182 // We'll use the same approximation math as above, this time making sure to
183 // use both i16 multiplies to our benefit, one for r/g, the other for b/a.
Mike Klein342b1b22019-06-13 16:43:18 -0500184 skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2);
Mike Klein7b7077c2019-06-03 17:10:59 -0500185
Mike Klein4c4945a2019-06-13 15:51:39 -0500186 skvm::I32 d = load32(dst),
187 rb = bit_and (d, splat(0x00ff00ff)),
188 ga = shr_16x2(d, 8);
Mike Klein57cb5ba2019-06-13 12:51:25 -0500189
Mike Klein7f061fb2019-06-13 13:12:38 -0500190 rb = shr_16x2(mul_16x2(rb, invAx2), 8); // Put the high 8 bits back in the low lane.
191 ga = mul_16x2(ga, invAx2); // Keep the high 8 bits up high...
Mike Klein2b7b2a22019-06-23 20:35:28 -0400192 ga = bit_clear(ga, splat(0x00ff00ff)); // ...and mask off the low bits.
Mike Klein7b7077c2019-06-03 17:10:59 -0500193
Mike Klein7f061fb2019-06-13 13:12:38 -0500194 store32(dst, add(s, bit_or(rb, ga)));
Mike Klein7b7077c2019-06-03 17:10:59 -0500195}