Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2019 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "tools/SkVMBuilders.h" |
| 9 | |
| 10 | // Some parts of this builder code are written less fluently than possible, |
| 11 | // to avoid any ambiguity of function argument evaluation order. This lets |
| 12 | // our golden tests work portably. In general there's no reason to fear |
| 13 | // nesting calls to Builder routines. |
| 14 | |
| 15 | SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) { |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 16 | auto byte_to_f32 = [&](skvm::I32 byte) { |
| 17 | skvm::F32 _1_255 = splat(1/255.0f); |
| 18 | return mul(_1_255, to_f32(byte)); |
| 19 | }; |
| 20 | |
Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 21 | auto load = [&](Fmt fmt, skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) { |
| 22 | skvm::Arg ptr; |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 23 | switch (fmt) { |
| 24 | case Fmt::A8: { |
Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 25 | ptr = varying<uint8_t>(); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 26 | *r = *g = *b = splat(0.0f); |
| 27 | *a = byte_to_f32(load8(ptr)); |
| 28 | } break; |
| 29 | |
| 30 | case Fmt::G8: { |
Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 31 | ptr = varying<uint8_t>(); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 32 | *r = *g = *b = byte_to_f32(load8(ptr)); |
| 33 | *a = splat(1.0f); |
| 34 | } break; |
| 35 | |
| 36 | case Fmt::RGBA_8888: { |
Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 37 | ptr = varying<int>(); |
Mike Klein | 9656dce | 2019-06-04 11:33:25 -0500 | [diff] [blame] | 38 | skvm::I32 rgba = load32(ptr); |
Mike Klein | a630732 | 2019-06-07 15:44:26 -0500 | [diff] [blame] | 39 | *r = byte_to_f32(extract(rgba, 0, splat(0xff))); |
| 40 | *g = byte_to_f32(extract(rgba, 8, splat(0xff))); |
| 41 | *b = byte_to_f32(extract(rgba, 16, splat(0xff))); |
| 42 | *a = byte_to_f32(extract(rgba, 24, splat(0xff))); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 43 | } break; |
| 44 | } |
Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 45 | return ptr; |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 46 | }; |
| 47 | |
| 48 | skvm::F32 r,g,b,a; |
Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 49 | (void)load(srcFmt, &r,&g,&b,&a); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 50 | |
| 51 | skvm::F32 dr,dg,db,da; |
Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 52 | skvm::Arg dst = load(dstFmt, &dr,&dg,&db,&da); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 53 | |
| 54 | skvm::F32 invA = sub(splat(1.0f), a); |
| 55 | r = mad(dr, invA, r); |
| 56 | g = mad(dg, invA, g); |
| 57 | b = mad(db, invA, b); |
| 58 | a = mad(da, invA, a); |
| 59 | |
| 60 | auto f32_to_byte = [&](skvm::F32 f32) { |
| 61 | skvm::F32 _255 = splat(255.0f), |
| 62 | _0_5 = splat(0.5f); |
| 63 | return to_i32(mad(f32, _255, _0_5)); |
| 64 | }; |
| 65 | switch (dstFmt) { |
| 66 | case Fmt::A8: { |
| 67 | store8(dst, f32_to_byte(a)); |
| 68 | } break; |
| 69 | |
| 70 | case Fmt::G8: { |
| 71 | skvm::F32 _2126 = splat(0.2126f), |
| 72 | _7152 = splat(0.7152f), |
| 73 | _0722 = splat(0.0722f); |
| 74 | store8(dst, f32_to_byte(mad(r, _2126, |
| 75 | mad(g, _7152, |
| 76 | mul(b, _0722))))); |
| 77 | } break; |
| 78 | |
| 79 | case Fmt::RGBA_8888: { |
Mike Klein | 1665aaa | 2019-06-04 10:41:49 -0500 | [diff] [blame] | 80 | skvm::I32 R = f32_to_byte(r), |
| 81 | G = f32_to_byte(g), |
| 82 | B = f32_to_byte(b), |
| 83 | A = f32_to_byte(a); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 84 | |
Mike Klein | 1665aaa | 2019-06-04 10:41:49 -0500 | [diff] [blame] | 85 | R = pack(R, G, 8); |
| 86 | B = pack(B, A, 8); |
| 87 | R = pack(R, B, 16); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 88 | |
| 89 | store32(dst, R); |
| 90 | } break; |
| 91 | } |
| 92 | } |
| 93 | |
Mike Klein | 397fc88 | 2019-06-20 11:37:10 -0500 | [diff] [blame] | 94 | SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() { |
Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 95 | skvm::Arg src = varying<int>(), |
| 96 | dst = varying<int>(); |
Mike Klein | 397fc88 | 2019-06-20 11:37:10 -0500 | [diff] [blame] | 97 | |
| 98 | auto load = [&](skvm::Arg ptr, |
| 99 | skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) { |
| 100 | skvm::I32 rgba = load32(ptr); |
| 101 | *r = extract(rgba, 0, splat(0xff)); |
| 102 | *g = extract(rgba, 8, splat(0xff)); |
| 103 | *b = extract(rgba, 16, splat(0xff)); |
| 104 | *a = extract(rgba, 24, splat(0xff)); |
| 105 | }; |
| 106 | |
| 107 | skvm::I32 r,g,b,a; |
| 108 | load(src, &r,&g,&b,&a); |
| 109 | |
| 110 | skvm::I32 dr,dg,db,da; |
| 111 | load(dst, &dr,&dg,&db,&da); |
| 112 | |
| 113 | // (xy + x)/256 is a good approximation of (xy + 127)/255 |
| 114 | // |
| 115 | // == (d*(255-a) + d)/256 |
| 116 | // == (d*(255-a+1) )/256 |
| 117 | // == (d*(256-a ) )/256 |
| 118 | |
| 119 | skvm::I32 invA = sub(splat(256), a); |
| 120 | r = add(r, shr(mul(dr, invA), 8)); |
| 121 | g = add(g, shr(mul(dg, invA), 8)); |
| 122 | b = add(b, shr(mul(db, invA), 8)); |
| 123 | a = add(a, shr(mul(da, invA), 8)); |
| 124 | |
| 125 | r = pack(r, g, 8); |
| 126 | b = pack(b, a, 8); |
| 127 | r = pack(r, b, 16); |
| 128 | store32(dst, r); |
| 129 | } |
| 130 | |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 131 | SrcoverBuilder_I32::SrcoverBuilder_I32() { |
Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 132 | skvm::Arg src = varying<int>(), |
| 133 | dst = varying<int>(); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 134 | |
| 135 | auto load = [&](skvm::Arg ptr, |
| 136 | skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) { |
Mike Klein | 9656dce | 2019-06-04 11:33:25 -0500 | [diff] [blame] | 137 | skvm::I32 rgba = load32(ptr); |
Mike Klein | 342b1b2 | 2019-06-13 16:43:18 -0500 | [diff] [blame] | 138 | *r = bit_and(rgba, splat(0xff)); |
| 139 | *g = bytes (rgba, 0x0002); |
| 140 | *b = bytes (rgba, 0x0003); |
Mike Klein | 3538908 | 2019-06-13 11:29:26 -0500 | [diff] [blame] | 141 | *a = shr (rgba, 24); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 142 | }; |
| 143 | |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 144 | skvm::I32 r,g,b,a; |
| 145 | load(src, &r,&g,&b,&a); |
| 146 | |
| 147 | skvm::I32 dr,dg,db,da; |
| 148 | load(dst, &dr,&dg,&db,&da); |
| 149 | |
Mike Klein | 821f5e8 | 2019-06-13 10:56:51 -0500 | [diff] [blame] | 150 | // (xy + x)/256 is a good approximation of (xy + 127)/255 |
| 151 | // |
| 152 | // == (d*(255-a) + d)/256 |
| 153 | // == (d*(255-a+1) )/256 |
| 154 | // == (d*(256-a ) )/256 |
| 155 | |
Mike Klein | 3538908 | 2019-06-13 11:29:26 -0500 | [diff] [blame] | 156 | // We're doing 8x8 bit multiplies in 32-bit lanes. |
| 157 | // Since the inputs and results both fit in 16 bits, |
| 158 | // we can use mul_16x2, which tends to be faster than mul. |
| 159 | // |
| 160 | // (The top 2 zero bytes of the inputs will also multiply |
| 161 | // with each other to produce zero... perfect.) |
| 162 | |
Mike Klein | 821f5e8 | 2019-06-13 10:56:51 -0500 | [diff] [blame] | 163 | skvm::I32 invA = sub(splat(256), a); |
Mike Klein | 3538908 | 2019-06-13 11:29:26 -0500 | [diff] [blame] | 164 | r = add(r, shr(mul_16x2(dr, invA), 8)); |
| 165 | g = add(g, shr(mul_16x2(dg, invA), 8)); |
| 166 | b = add(b, shr(mul_16x2(db, invA), 8)); |
| 167 | a = add(a, shr(mul_16x2(da, invA), 8)); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 168 | |
Mike Klein | 1665aaa | 2019-06-04 10:41:49 -0500 | [diff] [blame] | 169 | r = pack(r, g, 8); |
| 170 | b = pack(b, a, 8); |
| 171 | r = pack(r, b, 16); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 172 | store32(dst, r); |
| 173 | } |
| 174 | |
| 175 | SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() { |
Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 176 | skvm::Arg src = varying<int>(), |
| 177 | dst = varying<int>(); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 178 | |
Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 179 | // The s += d*invA adds won't overflow, |
| 180 | // so we don't have to unpack s beyond grabbing the alpha channel. |
| 181 | skvm::I32 s = load32(src), |
Mike Klein | 342b1b2 | 2019-06-13 16:43:18 -0500 | [diff] [blame] | 182 | ax2 = bytes(s, 0x0404); // rgba -> a0a0 |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 183 | |
Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 184 | // We'll use the same approximation math as above, this time making sure to |
| 185 | // use both i16 multiplies to our benefit, one for r/g, the other for b/a. |
Mike Klein | 342b1b2 | 2019-06-13 16:43:18 -0500 | [diff] [blame] | 186 | skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 187 | |
Mike Klein | 4c4945a | 2019-06-13 15:51:39 -0500 | [diff] [blame] | 188 | skvm::I32 d = load32(dst), |
| 189 | rb = bit_and (d, splat(0x00ff00ff)), |
| 190 | ga = shr_16x2(d, 8); |
Mike Klein | 57cb5ba | 2019-06-13 12:51:25 -0500 | [diff] [blame] | 191 | |
Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 192 | rb = shr_16x2(mul_16x2(rb, invAx2), 8); // Put the high 8 bits back in the low lane. |
| 193 | ga = mul_16x2(ga, invAx2); // Keep the high 8 bits up high... |
Mike Klein | 2b7b2a2 | 2019-06-23 20:35:28 -0400 | [diff] [blame] | 194 | ga = bit_clear(ga, splat(0x00ff00ff)); // ...and mask off the low bits. |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 195 | |
Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 196 | store32(dst, add(s, bit_or(rb, ga))); |
Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 197 | } |