blob: 3df28624ca517efcfb30cdc5bdabb3dbb0a5ae0c [file] [log] [blame]
Mike Klein7b7077c2019-06-03 17:10:59 -05001/*
2 * Copyright 2019 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "tools/SkVMBuilders.h"
9
10// Some parts of this builder code are written less fluently than possible,
11// to avoid any ambiguity of function argument evaluation order. This lets
12// our golden tests work portably. In general there's no reason to fear
13// nesting calls to Builder routines.
14
15SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050016 auto byte_to_f32 = [&](skvm::I32 byte) {
17 skvm::F32 _1_255 = splat(1/255.0f);
18 return mul(_1_255, to_f32(byte));
19 };
20
Mike Klein2616efd2019-07-15 10:04:08 -050021 auto load = [&](Fmt fmt, skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
22 skvm::Arg ptr;
Mike Klein7b7077c2019-06-03 17:10:59 -050023 switch (fmt) {
24 case Fmt::A8: {
Mike Klein2616efd2019-07-15 10:04:08 -050025 ptr = arg<uint8_t>();
Mike Klein7b7077c2019-06-03 17:10:59 -050026 *r = *g = *b = splat(0.0f);
27 *a = byte_to_f32(load8(ptr));
28 } break;
29
30 case Fmt::G8: {
Mike Klein2616efd2019-07-15 10:04:08 -050031 ptr = arg<uint8_t>();
Mike Klein7b7077c2019-06-03 17:10:59 -050032 *r = *g = *b = byte_to_f32(load8(ptr));
33 *a = splat(1.0f);
34 } break;
35
36 case Fmt::RGBA_8888: {
Mike Klein2616efd2019-07-15 10:04:08 -050037 ptr = arg<int>();
Mike Klein9656dce2019-06-04 11:33:25 -050038 skvm::I32 rgba = load32(ptr);
Mike Kleina6307322019-06-07 15:44:26 -050039 *r = byte_to_f32(extract(rgba, 0, splat(0xff)));
40 *g = byte_to_f32(extract(rgba, 8, splat(0xff)));
41 *b = byte_to_f32(extract(rgba, 16, splat(0xff)));
42 *a = byte_to_f32(extract(rgba, 24, splat(0xff)));
Mike Klein7b7077c2019-06-03 17:10:59 -050043 } break;
44 }
Mike Klein2616efd2019-07-15 10:04:08 -050045 return ptr;
Mike Klein7b7077c2019-06-03 17:10:59 -050046 };
47
48 skvm::F32 r,g,b,a;
Mike Klein2616efd2019-07-15 10:04:08 -050049 (void)load(srcFmt, &r,&g,&b,&a);
Mike Klein7b7077c2019-06-03 17:10:59 -050050
51 skvm::F32 dr,dg,db,da;
Mike Klein2616efd2019-07-15 10:04:08 -050052 skvm::Arg dst = load(dstFmt, &dr,&dg,&db,&da);
Mike Klein7b7077c2019-06-03 17:10:59 -050053
54 skvm::F32 invA = sub(splat(1.0f), a);
55 r = mad(dr, invA, r);
56 g = mad(dg, invA, g);
57 b = mad(db, invA, b);
58 a = mad(da, invA, a);
59
60 auto f32_to_byte = [&](skvm::F32 f32) {
61 skvm::F32 _255 = splat(255.0f),
62 _0_5 = splat(0.5f);
63 return to_i32(mad(f32, _255, _0_5));
64 };
65 switch (dstFmt) {
66 case Fmt::A8: {
67 store8(dst, f32_to_byte(a));
68 } break;
69
70 case Fmt::G8: {
71 skvm::F32 _2126 = splat(0.2126f),
72 _7152 = splat(0.7152f),
73 _0722 = splat(0.0722f);
74 store8(dst, f32_to_byte(mad(r, _2126,
75 mad(g, _7152,
76 mul(b, _0722)))));
77 } break;
78
79 case Fmt::RGBA_8888: {
Mike Klein1665aaa2019-06-04 10:41:49 -050080 skvm::I32 R = f32_to_byte(r),
81 G = f32_to_byte(g),
82 B = f32_to_byte(b),
83 A = f32_to_byte(a);
Mike Klein7b7077c2019-06-03 17:10:59 -050084
Mike Klein1665aaa2019-06-04 10:41:49 -050085 R = pack(R, G, 8);
86 B = pack(B, A, 8);
87 R = pack(R, B, 16);
Mike Klein7b7077c2019-06-03 17:10:59 -050088
89 store32(dst, R);
90 } break;
91 }
92}
93
Mike Klein397fc882019-06-20 11:37:10 -050094SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
Mike Klein2616efd2019-07-15 10:04:08 -050095 skvm::Arg src = arg<int>(),
96 dst = arg<int>();
Mike Klein397fc882019-06-20 11:37:10 -050097
98 auto load = [&](skvm::Arg ptr,
99 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
100 skvm::I32 rgba = load32(ptr);
101 *r = extract(rgba, 0, splat(0xff));
102 *g = extract(rgba, 8, splat(0xff));
103 *b = extract(rgba, 16, splat(0xff));
104 *a = extract(rgba, 24, splat(0xff));
105 };
106
107 skvm::I32 r,g,b,a;
108 load(src, &r,&g,&b,&a);
109
110 skvm::I32 dr,dg,db,da;
111 load(dst, &dr,&dg,&db,&da);
112
113 // (xy + x)/256 is a good approximation of (xy + 127)/255
114 //
115 // == (d*(255-a) + d)/256
116 // == (d*(255-a+1) )/256
117 // == (d*(256-a ) )/256
118
119 skvm::I32 invA = sub(splat(256), a);
120 r = add(r, shr(mul(dr, invA), 8));
121 g = add(g, shr(mul(dg, invA), 8));
122 b = add(b, shr(mul(db, invA), 8));
123 a = add(a, shr(mul(da, invA), 8));
124
125 r = pack(r, g, 8);
126 b = pack(b, a, 8);
127 r = pack(r, b, 16);
128 store32(dst, r);
129}
130
Mike Klein7b7077c2019-06-03 17:10:59 -0500131SrcoverBuilder_I32::SrcoverBuilder_I32() {
Mike Klein2616efd2019-07-15 10:04:08 -0500132 skvm::Arg src = arg<int>(),
133 dst = arg<int>();
Mike Klein7b7077c2019-06-03 17:10:59 -0500134
135 auto load = [&](skvm::Arg ptr,
136 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
Mike Klein9656dce2019-06-04 11:33:25 -0500137 skvm::I32 rgba = load32(ptr);
Mike Klein342b1b22019-06-13 16:43:18 -0500138 *r = bit_and(rgba, splat(0xff));
139 *g = bytes (rgba, 0x0002);
140 *b = bytes (rgba, 0x0003);
Mike Klein35389082019-06-13 11:29:26 -0500141 *a = shr (rgba, 24);
Mike Klein7b7077c2019-06-03 17:10:59 -0500142 };
143
Mike Klein7b7077c2019-06-03 17:10:59 -0500144 skvm::I32 r,g,b,a;
145 load(src, &r,&g,&b,&a);
146
147 skvm::I32 dr,dg,db,da;
148 load(dst, &dr,&dg,&db,&da);
149
Mike Klein821f5e82019-06-13 10:56:51 -0500150 // (xy + x)/256 is a good approximation of (xy + 127)/255
151 //
152 // == (d*(255-a) + d)/256
153 // == (d*(255-a+1) )/256
154 // == (d*(256-a ) )/256
155
Mike Klein35389082019-06-13 11:29:26 -0500156 // We're doing 8x8 bit multiplies in 32-bit lanes.
157 // Since the inputs and results both fit in 16 bits,
158 // we can use mul_16x2, which tends to be faster than mul.
159 //
160 // (The top 2 zero bytes of the inputs will also multiply
161 // with each other to produce zero... perfect.)
162
Mike Klein821f5e82019-06-13 10:56:51 -0500163 skvm::I32 invA = sub(splat(256), a);
Mike Klein35389082019-06-13 11:29:26 -0500164 r = add(r, shr(mul_16x2(dr, invA), 8));
165 g = add(g, shr(mul_16x2(dg, invA), 8));
166 b = add(b, shr(mul_16x2(db, invA), 8));
167 a = add(a, shr(mul_16x2(da, invA), 8));
Mike Klein7b7077c2019-06-03 17:10:59 -0500168
Mike Klein1665aaa2019-06-04 10:41:49 -0500169 r = pack(r, g, 8);
170 b = pack(b, a, 8);
171 r = pack(r, b, 16);
Mike Klein7b7077c2019-06-03 17:10:59 -0500172 store32(dst, r);
173}
174
175SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
Mike Klein2616efd2019-07-15 10:04:08 -0500176 skvm::Arg src = arg<int>(),
177 dst = arg<int>();
Mike Klein7b7077c2019-06-03 17:10:59 -0500178
Mike Klein7f061fb2019-06-13 13:12:38 -0500179 // The s += d*invA adds won't overflow,
180 // so we don't have to unpack s beyond grabbing the alpha channel.
181 skvm::I32 s = load32(src),
Mike Klein342b1b22019-06-13 16:43:18 -0500182 ax2 = bytes(s, 0x0404); // rgba -> a0a0
Mike Klein7b7077c2019-06-03 17:10:59 -0500183
Mike Klein7f061fb2019-06-13 13:12:38 -0500184 // We'll use the same approximation math as above, this time making sure to
185 // use both i16 multiplies to our benefit, one for r/g, the other for b/a.
Mike Klein342b1b22019-06-13 16:43:18 -0500186 skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2);
Mike Klein7b7077c2019-06-03 17:10:59 -0500187
Mike Klein4c4945a2019-06-13 15:51:39 -0500188 skvm::I32 d = load32(dst),
189 rb = bit_and (d, splat(0x00ff00ff)),
190 ga = shr_16x2(d, 8);
Mike Klein57cb5ba2019-06-13 12:51:25 -0500191
Mike Klein7f061fb2019-06-13 13:12:38 -0500192 rb = shr_16x2(mul_16x2(rb, invAx2), 8); // Put the high 8 bits back in the low lane.
193 ga = mul_16x2(ga, invAx2); // Keep the high 8 bits up high...
Mike Klein2b7b2a22019-06-23 20:35:28 -0400194 ga = bit_clear(ga, splat(0x00ff00ff)); // ...and mask off the low bits.
Mike Klein7b7077c2019-06-03 17:10:59 -0500195
Mike Klein7f061fb2019-06-13 13:12:38 -0500196 store32(dst, add(s, bit_or(rb, ga)));
Mike Klein7b7077c2019-06-03 17:10:59 -0500197}