blob: 13ce04cb79fdf4660dabda2f1a195af1b8ffe962 [file] [log] [blame]
Mike Klein7b7077c2019-06-03 17:10:59 -05001/*
2 * Copyright 2019 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "tools/SkVMBuilders.h"
9
10// Some parts of this builder code are written less fluently than possible,
11// to avoid any ambiguity of function argument evaluation order. This lets
12// our golden tests work portably. In general there's no reason to fear
13// nesting calls to Builder routines.
14
15SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
16 skvm::Arg src = arg(0),
17 dst = arg(1);
18
19 auto byte_to_f32 = [&](skvm::I32 byte) {
20 skvm::F32 _1_255 = splat(1/255.0f);
21 return mul(_1_255, to_f32(byte));
22 };
23
24 auto load = [&](skvm::Arg ptr, Fmt fmt,
25 skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
26 switch (fmt) {
27 case Fmt::A8: {
28 *r = *g = *b = splat(0.0f);
29 *a = byte_to_f32(load8(ptr));
30 } break;
31
32 case Fmt::G8: {
33 *r = *g = *b = byte_to_f32(load8(ptr));
34 *a = splat(1.0f);
35 } break;
36
37 case Fmt::RGBA_8888: {
Mike Klein9656dce2019-06-04 11:33:25 -050038 skvm::I32 rgba = load32(ptr);
Mike Kleina6307322019-06-07 15:44:26 -050039 *r = byte_to_f32(extract(rgba, 0, splat(0xff)));
40 *g = byte_to_f32(extract(rgba, 8, splat(0xff)));
41 *b = byte_to_f32(extract(rgba, 16, splat(0xff)));
42 *a = byte_to_f32(extract(rgba, 24, splat(0xff)));
Mike Klein7b7077c2019-06-03 17:10:59 -050043 } break;
44 }
45 };
46
47 skvm::F32 r,g,b,a;
48 load(src, srcFmt, &r,&g,&b,&a);
49
50 skvm::F32 dr,dg,db,da;
51 load(dst, dstFmt, &dr,&dg,&db,&da);
52
53 skvm::F32 invA = sub(splat(1.0f), a);
54 r = mad(dr, invA, r);
55 g = mad(dg, invA, g);
56 b = mad(db, invA, b);
57 a = mad(da, invA, a);
58
59 auto f32_to_byte = [&](skvm::F32 f32) {
60 skvm::F32 _255 = splat(255.0f),
61 _0_5 = splat(0.5f);
62 return to_i32(mad(f32, _255, _0_5));
63 };
64 switch (dstFmt) {
65 case Fmt::A8: {
66 store8(dst, f32_to_byte(a));
67 } break;
68
69 case Fmt::G8: {
70 skvm::F32 _2126 = splat(0.2126f),
71 _7152 = splat(0.7152f),
72 _0722 = splat(0.0722f);
73 store8(dst, f32_to_byte(mad(r, _2126,
74 mad(g, _7152,
75 mul(b, _0722)))));
76 } break;
77
78 case Fmt::RGBA_8888: {
Mike Klein1665aaa2019-06-04 10:41:49 -050079 skvm::I32 R = f32_to_byte(r),
80 G = f32_to_byte(g),
81 B = f32_to_byte(b),
82 A = f32_to_byte(a);
Mike Klein7b7077c2019-06-03 17:10:59 -050083
Mike Klein1665aaa2019-06-04 10:41:49 -050084 R = pack(R, G, 8);
85 B = pack(B, A, 8);
86 R = pack(R, B, 16);
Mike Klein7b7077c2019-06-03 17:10:59 -050087
88 store32(dst, R);
89 } break;
90 }
91}
92
Mike Klein397fc882019-06-20 11:37:10 -050093SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
94 skvm::Arg src = arg(0),
95 dst = arg(1);
96
97 auto load = [&](skvm::Arg ptr,
98 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
99 skvm::I32 rgba = load32(ptr);
100 *r = extract(rgba, 0, splat(0xff));
101 *g = extract(rgba, 8, splat(0xff));
102 *b = extract(rgba, 16, splat(0xff));
103 *a = extract(rgba, 24, splat(0xff));
104 };
105
106 skvm::I32 r,g,b,a;
107 load(src, &r,&g,&b,&a);
108
109 skvm::I32 dr,dg,db,da;
110 load(dst, &dr,&dg,&db,&da);
111
112 // (xy + x)/256 is a good approximation of (xy + 127)/255
113 //
114 // == (d*(255-a) + d)/256
115 // == (d*(255-a+1) )/256
116 // == (d*(256-a ) )/256
117
118 skvm::I32 invA = sub(splat(256), a);
119 r = add(r, shr(mul(dr, invA), 8));
120 g = add(g, shr(mul(dg, invA), 8));
121 b = add(b, shr(mul(db, invA), 8));
122 a = add(a, shr(mul(da, invA), 8));
123
124 r = pack(r, g, 8);
125 b = pack(b, a, 8);
126 r = pack(r, b, 16);
127 store32(dst, r);
128}
129
Mike Klein7b7077c2019-06-03 17:10:59 -0500130SrcoverBuilder_I32::SrcoverBuilder_I32() {
131 skvm::Arg src = arg(0),
132 dst = arg(1);
133
134 auto load = [&](skvm::Arg ptr,
135 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
Mike Klein9656dce2019-06-04 11:33:25 -0500136 skvm::I32 rgba = load32(ptr);
Mike Klein342b1b22019-06-13 16:43:18 -0500137 *r = bit_and(rgba, splat(0xff));
138 *g = bytes (rgba, 0x0002);
139 *b = bytes (rgba, 0x0003);
Mike Klein35389082019-06-13 11:29:26 -0500140 *a = shr (rgba, 24);
Mike Klein7b7077c2019-06-03 17:10:59 -0500141 };
142
Mike Klein7b7077c2019-06-03 17:10:59 -0500143 skvm::I32 r,g,b,a;
144 load(src, &r,&g,&b,&a);
145
146 skvm::I32 dr,dg,db,da;
147 load(dst, &dr,&dg,&db,&da);
148
Mike Klein821f5e82019-06-13 10:56:51 -0500149 // (xy + x)/256 is a good approximation of (xy + 127)/255
150 //
151 // == (d*(255-a) + d)/256
152 // == (d*(255-a+1) )/256
153 // == (d*(256-a ) )/256
154
Mike Klein35389082019-06-13 11:29:26 -0500155 // We're doing 8x8 bit multiplies in 32-bit lanes.
156 // Since the inputs and results both fit in 16 bits,
157 // we can use mul_16x2, which tends to be faster than mul.
158 //
159 // (The top 2 zero bytes of the inputs will also multiply
160 // with each other to produce zero... perfect.)
161
Mike Klein821f5e82019-06-13 10:56:51 -0500162 skvm::I32 invA = sub(splat(256), a);
Mike Klein35389082019-06-13 11:29:26 -0500163 r = add(r, shr(mul_16x2(dr, invA), 8));
164 g = add(g, shr(mul_16x2(dg, invA), 8));
165 b = add(b, shr(mul_16x2(db, invA), 8));
166 a = add(a, shr(mul_16x2(da, invA), 8));
Mike Klein7b7077c2019-06-03 17:10:59 -0500167
Mike Klein1665aaa2019-06-04 10:41:49 -0500168 r = pack(r, g, 8);
169 b = pack(b, a, 8);
170 r = pack(r, b, 16);
Mike Klein7b7077c2019-06-03 17:10:59 -0500171 store32(dst, r);
172}
173
174SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
175 skvm::Arg src = arg(0),
176 dst = arg(1);
177
Mike Klein7f061fb2019-06-13 13:12:38 -0500178 // The s += d*invA adds won't overflow,
179 // so we don't have to unpack s beyond grabbing the alpha channel.
180 skvm::I32 s = load32(src),
Mike Klein342b1b22019-06-13 16:43:18 -0500181 ax2 = bytes(s, 0x0404); // rgba -> a0a0
Mike Klein7b7077c2019-06-03 17:10:59 -0500182
Mike Klein7f061fb2019-06-13 13:12:38 -0500183 // We'll use the same approximation math as above, this time making sure to
184 // use both i16 multiplies to our benefit, one for r/g, the other for b/a.
Mike Klein342b1b22019-06-13 16:43:18 -0500185 skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2);
Mike Klein7b7077c2019-06-03 17:10:59 -0500186
Mike Klein4c4945a2019-06-13 15:51:39 -0500187 skvm::I32 d = load32(dst),
188 rb = bit_and (d, splat(0x00ff00ff)),
189 ga = shr_16x2(d, 8);
Mike Klein57cb5ba2019-06-13 12:51:25 -0500190
Mike Klein7f061fb2019-06-13 13:12:38 -0500191 rb = shr_16x2(mul_16x2(rb, invAx2), 8); // Put the high 8 bits back in the low lane.
192 ga = mul_16x2(ga, invAx2); // Keep the high 8 bits up high...
Mike Klein2b7b2a22019-06-23 20:35:28 -0400193 ga = bit_clear(ga, splat(0x00ff00ff)); // ...and mask off the low bits.
Mike Klein7b7077c2019-06-03 17:10:59 -0500194
Mike Klein7f061fb2019-06-13 13:12:38 -0500195 store32(dst, add(s, bit_or(rb, ga)));
Mike Klein7b7077c2019-06-03 17:10:59 -0500196}