| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright 2019 Google Inc. | 
 | 3 |  * | 
 | 4 |  * Use of this source code is governed by a BSD-style license that can be | 
 | 5 |  * found in the LICENSE file. | 
 | 6 |  */ | 
 | 7 |  | 
 | 8 | #include "tools/SkVMBuilders.h" | 
 | 9 |  | 
 | 10 | // Some parts of this builder code are written less fluently than possible, | 
 | 11 | // to avoid any ambiguity of function argument evaluation order.  This lets | 
 | 12 | // our golden tests work portably.  In general there's no reason to fear | 
 | 13 | // nesting calls to Builder routines. | 
 | 14 |  | 
 | 15 | SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) { | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 16 |     auto byte_to_f32 = [&](skvm::I32 byte) { | 
 | 17 |         skvm::F32 _1_255 = splat(1/255.0f); | 
 | 18 |         return mul(_1_255, to_f32(byte)); | 
 | 19 |     }; | 
 | 20 |  | 
| Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 21 |     auto load = [&](Fmt fmt, skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) { | 
 | 22 |         skvm::Arg ptr; | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 23 |         switch (fmt) { | 
 | 24 |             case Fmt::A8: { | 
| Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 25 |                 ptr = varying<uint8_t>(); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 26 |                 *r = *g = *b = splat(0.0f); | 
 | 27 |                 *a = byte_to_f32(load8(ptr)); | 
 | 28 |             } break; | 
 | 29 |  | 
 | 30 |             case Fmt::G8: { | 
| Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 31 |                 ptr = varying<uint8_t>(); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 32 |                 *r = *g = *b = byte_to_f32(load8(ptr)); | 
 | 33 |                 *a = splat(1.0f); | 
 | 34 |             } break; | 
 | 35 |  | 
 | 36 |             case Fmt::RGBA_8888: { | 
| Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 37 |                 ptr = varying<int>(); | 
| Mike Klein | 9656dce | 2019-06-04 11:33:25 -0500 | [diff] [blame] | 38 |                 skvm::I32 rgba = load32(ptr); | 
| Mike Klein | a630732 | 2019-06-07 15:44:26 -0500 | [diff] [blame] | 39 |                 *r = byte_to_f32(extract(rgba,  0, splat(0xff))); | 
 | 40 |                 *g = byte_to_f32(extract(rgba,  8, splat(0xff))); | 
 | 41 |                 *b = byte_to_f32(extract(rgba, 16, splat(0xff))); | 
 | 42 |                 *a = byte_to_f32(extract(rgba, 24, splat(0xff))); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 43 |             } break; | 
 | 44 |         } | 
| Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 45 |         return ptr; | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 46 |     }; | 
 | 47 |  | 
 | 48 |     skvm::F32 r,g,b,a; | 
| Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 49 |     (void)load(srcFmt, &r,&g,&b,&a); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 50 |  | 
 | 51 |     skvm::F32 dr,dg,db,da; | 
| Mike Klein | 2616efd | 2019-07-15 10:04:08 -0500 | [diff] [blame] | 52 |     skvm::Arg dst = load(dstFmt, &dr,&dg,&db,&da); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 53 |  | 
 | 54 |     skvm::F32 invA = sub(splat(1.0f), a); | 
 | 55 |     r = mad(dr, invA, r); | 
 | 56 |     g = mad(dg, invA, g); | 
 | 57 |     b = mad(db, invA, b); | 
 | 58 |     a = mad(da, invA, a); | 
 | 59 |  | 
 | 60 |     auto f32_to_byte = [&](skvm::F32 f32) { | 
 | 61 |         skvm::F32 _255 = splat(255.0f), | 
 | 62 |                   _0_5 = splat(0.5f); | 
 | 63 |         return to_i32(mad(f32, _255, _0_5)); | 
 | 64 |     }; | 
 | 65 |     switch (dstFmt) { | 
 | 66 |         case Fmt::A8: { | 
 | 67 |             store8(dst, f32_to_byte(a)); | 
 | 68 |         } break; | 
 | 69 |  | 
 | 70 |         case Fmt::G8: { | 
 | 71 |             skvm::F32 _2126 = splat(0.2126f), | 
 | 72 |                       _7152 = splat(0.7152f), | 
 | 73 |                       _0722 = splat(0.0722f); | 
 | 74 |             store8(dst, f32_to_byte(mad(r, _2126, | 
 | 75 |                                     mad(g, _7152, | 
 | 76 |                                     mul(b, _0722))))); | 
 | 77 |         } break; | 
 | 78 |  | 
 | 79 |         case Fmt::RGBA_8888: { | 
| Mike Klein | 1665aaa | 2019-06-04 10:41:49 -0500 | [diff] [blame] | 80 |             skvm::I32 R = f32_to_byte(r), | 
 | 81 |                       G = f32_to_byte(g), | 
 | 82 |                       B = f32_to_byte(b), | 
 | 83 |                       A = f32_to_byte(a); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 84 |  | 
| Mike Klein | 1665aaa | 2019-06-04 10:41:49 -0500 | [diff] [blame] | 85 |             R = pack(R, G, 8); | 
 | 86 |             B = pack(B, A, 8); | 
 | 87 |             R = pack(R, B, 16); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 88 |  | 
 | 89 |             store32(dst, R); | 
 | 90 |         } break; | 
 | 91 |     } | 
 | 92 | } | 
 | 93 |  | 
| Mike Klein | 397fc88 | 2019-06-20 11:37:10 -0500 | [diff] [blame] | 94 | SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() { | 
| Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 95 |     skvm::Arg src = varying<int>(), | 
 | 96 |               dst = varying<int>(); | 
| Mike Klein | 397fc88 | 2019-06-20 11:37:10 -0500 | [diff] [blame] | 97 |  | 
 | 98 |     auto load = [&](skvm::Arg ptr, | 
 | 99 |                     skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) { | 
 | 100 |         skvm::I32 rgba = load32(ptr); | 
 | 101 |         *r = extract(rgba,  0, splat(0xff)); | 
 | 102 |         *g = extract(rgba,  8, splat(0xff)); | 
 | 103 |         *b = extract(rgba, 16, splat(0xff)); | 
 | 104 |         *a = extract(rgba, 24, splat(0xff)); | 
 | 105 |     }; | 
 | 106 |  | 
 | 107 |     skvm::I32 r,g,b,a; | 
 | 108 |     load(src, &r,&g,&b,&a); | 
 | 109 |  | 
 | 110 |     skvm::I32 dr,dg,db,da; | 
 | 111 |     load(dst, &dr,&dg,&db,&da); | 
 | 112 |  | 
 | 113 |     // (xy + x)/256 is a good approximation of (xy + 127)/255 | 
 | 114 |     // | 
 | 115 |     //   == (d*(255-a) + d)/256 | 
 | 116 |     //   == (d*(255-a+1)  )/256 | 
 | 117 |     //   == (d*(256-a  )  )/256 | 
 | 118 |  | 
 | 119 |     skvm::I32 invA = sub(splat(256), a); | 
 | 120 |     r = add(r, shr(mul(dr, invA), 8)); | 
 | 121 |     g = add(g, shr(mul(dg, invA), 8)); | 
 | 122 |     b = add(b, shr(mul(db, invA), 8)); | 
 | 123 |     a = add(a, shr(mul(da, invA), 8)); | 
 | 124 |  | 
 | 125 |     r = pack(r, g, 8); | 
 | 126 |     b = pack(b, a, 8); | 
 | 127 |     r = pack(r, b, 16); | 
 | 128 |     store32(dst, r); | 
 | 129 | } | 
 | 130 |  | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 131 | SrcoverBuilder_I32::SrcoverBuilder_I32() { | 
| Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 132 |     skvm::Arg src = varying<int>(), | 
 | 133 |               dst = varying<int>(); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 134 |  | 
 | 135 |     auto load = [&](skvm::Arg ptr, | 
 | 136 |                     skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) { | 
| Mike Klein | 9656dce | 2019-06-04 11:33:25 -0500 | [diff] [blame] | 137 |         skvm::I32 rgba = load32(ptr); | 
| Mike Klein | 342b1b2 | 2019-06-13 16:43:18 -0500 | [diff] [blame] | 138 |         *r = bit_and(rgba, splat(0xff)); | 
 | 139 |         *g = bytes  (rgba, 0x0002); | 
 | 140 |         *b = bytes  (rgba, 0x0003); | 
| Mike Klein | 3538908 | 2019-06-13 11:29:26 -0500 | [diff] [blame] | 141 |         *a = shr    (rgba, 24); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 142 |     }; | 
 | 143 |  | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 144 |     skvm::I32 r,g,b,a; | 
 | 145 |     load(src, &r,&g,&b,&a); | 
 | 146 |  | 
 | 147 |     skvm::I32 dr,dg,db,da; | 
 | 148 |     load(dst, &dr,&dg,&db,&da); | 
 | 149 |  | 
| Mike Klein | 821f5e8 | 2019-06-13 10:56:51 -0500 | [diff] [blame] | 150 |     // (xy + x)/256 is a good approximation of (xy + 127)/255 | 
 | 151 |     // | 
 | 152 |     //   == (d*(255-a) + d)/256 | 
 | 153 |     //   == (d*(255-a+1)  )/256 | 
 | 154 |     //   == (d*(256-a  )  )/256 | 
 | 155 |  | 
| Mike Klein | 3538908 | 2019-06-13 11:29:26 -0500 | [diff] [blame] | 156 |     // We're doing 8x8 bit multiplies in 32-bit lanes. | 
 | 157 |     // Since the inputs and results both fit in 16 bits, | 
 | 158 |     // we can use mul_16x2, which tends to be faster than mul. | 
 | 159 |     // | 
 | 160 |     // (The top 2 zero bytes of the inputs will also multiply | 
 | 161 |     // with each other to produce zero... perfect.) | 
 | 162 |  | 
| Mike Klein | 821f5e8 | 2019-06-13 10:56:51 -0500 | [diff] [blame] | 163 |     skvm::I32 invA = sub(splat(256), a); | 
| Mike Klein | 3538908 | 2019-06-13 11:29:26 -0500 | [diff] [blame] | 164 |     r = add(r, shr(mul_16x2(dr, invA), 8)); | 
 | 165 |     g = add(g, shr(mul_16x2(dg, invA), 8)); | 
 | 166 |     b = add(b, shr(mul_16x2(db, invA), 8)); | 
 | 167 |     a = add(a, shr(mul_16x2(da, invA), 8)); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 168 |  | 
| Mike Klein | 1665aaa | 2019-06-04 10:41:49 -0500 | [diff] [blame] | 169 |     r = pack(r, g, 8); | 
 | 170 |     b = pack(b, a, 8); | 
 | 171 |     r = pack(r, b, 16); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 172 |     store32(dst, r); | 
 | 173 | } | 
 | 174 |  | 
 | 175 | SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() { | 
| Mike Klein | 5591fdf | 2019-07-30 09:44:30 -0500 | [diff] [blame] | 176 |     skvm::Arg src = varying<int>(), | 
 | 177 |               dst = varying<int>(); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 178 |  | 
| Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 179 |     // The s += d*invA adds won't overflow, | 
 | 180 |     // so we don't have to unpack s beyond grabbing the alpha channel. | 
 | 181 |     skvm::I32 s = load32(src), | 
| Mike Klein | 342b1b2 | 2019-06-13 16:43:18 -0500 | [diff] [blame] | 182 |             ax2 = bytes(s, 0x0404);  // rgba -> a0a0 | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 183 |  | 
| Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 184 |     // We'll use the same approximation math as above, this time making sure to | 
 | 185 |     // use both i16 multiplies to our benefit, one for r/g, the other for b/a. | 
| Mike Klein | 342b1b2 | 2019-06-13 16:43:18 -0500 | [diff] [blame] | 186 |     skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 187 |  | 
| Mike Klein | 4c4945a | 2019-06-13 15:51:39 -0500 | [diff] [blame] | 188 |     skvm::I32 d  = load32(dst), | 
 | 189 |               rb = bit_and (d, splat(0x00ff00ff)), | 
 | 190 |               ga = shr_16x2(d, 8); | 
| Mike Klein | 57cb5ba | 2019-06-13 12:51:25 -0500 | [diff] [blame] | 191 |  | 
| Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 192 |     rb = shr_16x2(mul_16x2(rb, invAx2), 8);  // Put the high 8 bits back in the low lane. | 
 | 193 |     ga =          mul_16x2(ga, invAx2);      // Keep the high 8 bits up high... | 
| Mike Klein | 2b7b2a2 | 2019-06-23 20:35:28 -0400 | [diff] [blame] | 194 |     ga = bit_clear(ga, splat(0x00ff00ff));     // ...and mask off the low bits. | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 195 |  | 
| Mike Klein | 7f061fb | 2019-06-13 13:12:38 -0500 | [diff] [blame] | 196 |     store32(dst, add(s, bit_or(rb, ga))); | 
| Mike Klein | 7b7077c | 2019-06-03 17:10:59 -0500 | [diff] [blame] | 197 | } |