Mike Klein | 68c50d0 | 2019-05-29 12:57:54 -0500 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2019 Google LLC |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "include/core/SkColorPriv.h" |
| 9 | #include "include/private/SkColorData.h" |
| 10 | #include "src/core/SkVM.h" |
| 11 | #include "tests/Test.h" |
| 12 | |
| 13 | enum Fmt { A8, G8, RGBA_8888 }; |
| 14 | const char* fmt_name(Fmt fmt) { |
| 15 | switch (fmt) { |
| 16 | case A8: return "A8"; |
| 17 | case G8: return "G8"; |
| 18 | case RGBA_8888: return "RGBA_8888"; |
| 19 | } |
| 20 | return ""; |
| 21 | } |
| 22 | |
| 23 | // Here's a cute little trick that avoids the need to explicitly thread |
| 24 | // and skvm::Builder* through and make a lot of builder->foo() calls. |
| 25 | // Instead the builder becomes this, with this-> omitted for clarity. |
| 26 | // |
| 27 | // Think of this as |
| 28 | // static void srcover(skvm::Builder*, Fmt srcFmt, Fmt dstFmt) { ... } |
| 29 | // |
| 30 | // Some parts of this builder code are written less fluently than possible, |
| 31 | // to avoid any ambiguity of function argument evaluation order. This lets |
| 32 | // our golden tests (kExpected) work portably. In general there's no reason |
| 33 | // to fear nesting calls to Builder routines. |
| 34 | |
| 35 | struct SrcoverBuilder : public skvm::Builder { |
| 36 | SrcoverBuilder(Fmt srcFmt, Fmt dstFmt) { |
| 37 | skvm::Arg src = arg(0), |
| 38 | dst = arg(1); |
| 39 | |
| 40 | auto byte_to_f32 = [&](skvm::I32 byte) { |
| 41 | skvm::F32 _1_255 = splat(1/255.0f); |
| 42 | return mul(_1_255, to_f32(byte)); |
| 43 | }; |
| 44 | |
| 45 | auto load = [&](skvm::Arg ptr, Fmt fmt, |
| 46 | skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) { |
| 47 | switch (fmt) { |
| 48 | case A8: { |
| 49 | *r = *g = *b = splat(0.0f); |
| 50 | *a = byte_to_f32(load8(ptr)); |
| 51 | } break; |
| 52 | |
| 53 | case G8: { |
| 54 | *r = *g = *b = byte_to_f32(load8(ptr)); |
| 55 | *a = splat(1.0f); |
| 56 | } break; |
| 57 | |
| 58 | case RGBA_8888: { |
| 59 | skvm::I32 rgba = load32(ptr), |
| 60 | _255 = splat(255); |
| 61 | *r = byte_to_f32(bit_and( rgba , _255)); |
| 62 | *g = byte_to_f32(bit_and(shr(rgba, 8), _255)); |
| 63 | *b = byte_to_f32(bit_and(shr(rgba, 16), _255)); |
| 64 | *a = byte_to_f32( shr(rgba, 24) ); |
| 65 | } break; |
| 66 | } |
| 67 | }; |
| 68 | |
| 69 | skvm::F32 r,g,b,a; |
| 70 | load(src, srcFmt, &r,&g,&b,&a); |
| 71 | |
| 72 | skvm::F32 dr,dg,db,da; |
| 73 | load(dst, dstFmt, &dr,&dg,&db,&da); |
| 74 | |
| 75 | skvm::F32 invA = sub(splat(1.0f), a); |
| 76 | r = mad(dr, invA, r); |
| 77 | g = mad(dg, invA, g); |
| 78 | b = mad(db, invA, b); |
| 79 | a = mad(da, invA, a); |
| 80 | |
| 81 | auto f32_to_byte = [&](skvm::F32 f32) { |
| 82 | skvm::F32 _255 = splat(255.0f), |
| 83 | _0_5 = splat(0.5f); |
| 84 | return to_i32(mad(f32, _255, _0_5)); |
| 85 | }; |
| 86 | switch (dstFmt) { |
| 87 | case A8: { |
| 88 | store8(dst, f32_to_byte(a)); |
| 89 | } break; |
| 90 | |
| 91 | case G8: { |
| 92 | skvm::F32 _2126 = splat(0.2126f), |
| 93 | _7152 = splat(0.7152f), |
| 94 | _0722 = splat(0.0722f); |
| 95 | store8(dst, f32_to_byte(mad(r, _2126, |
| 96 | mad(g, _7152, |
| 97 | mul(b, _0722))))); |
| 98 | } break; |
| 99 | |
| 100 | case RGBA_8888: { |
| 101 | skvm::I32 R = f32_to_byte(r) , |
| 102 | G = shl(f32_to_byte(g), 8), |
| 103 | B = shl(f32_to_byte(b), 16), |
| 104 | A = shl(f32_to_byte(a), 24); |
| 105 | |
| 106 | R = bit_or(R,G); |
| 107 | R = bit_or(R,B); |
| 108 | R = bit_or(R,A); |
| 109 | |
| 110 | store32(dst, R); |
| 111 | } break; |
| 112 | } |
| 113 | } |
| 114 | }; |
| 115 | |
| 116 | static const char* kExpected[] = { |
| 117 | R"(r0 = load8 arg(0) |
| 118 | r1 = splat 3B808081 (0.0039215689) |
| 119 | r0 = to_f32 r0 |
| 120 | r0 = mul_f32 r1 r0 |
| 121 | r2 = load8 arg(1) |
| 122 | r2 = to_f32 r2 |
| 123 | r2 = mul_f32 r1 r2 |
| 124 | r1 = splat 3F800000 (1) |
| 125 | r1 = sub_f32 r1 r0 |
| 126 | r1 = mad_f32 r2 r1 r0 |
| 127 | r2 = splat 437F0000 (255) |
| 128 | r0 = splat 3F000000 (0.5) |
| 129 | r0 = mad_f32 r1 r2 r0 |
| 130 | r0 = to_i32 r0 |
| 131 | store8 arg(1) r0 |
| 132 | )", |
| 133 | R"(r0 = splat 0 (0) |
| 134 | r1 = load8 arg(0) |
| 135 | r2 = splat 3B808081 (0.0039215689) |
| 136 | r1 = to_f32 r1 |
| 137 | r1 = mul_f32 r2 r1 |
| 138 | r3 = load8 arg(1) |
| 139 | r3 = to_f32 r3 |
| 140 | r3 = mul_f32 r2 r3 |
| 141 | r2 = splat 3F800000 (1) |
| 142 | r2 = sub_f32 r2 r1 |
| 143 | r2 = mad_f32 r3 r2 r0 |
| 144 | r3 = splat 3E59B3D0 (0.21259999) |
| 145 | r0 = splat 3F371759 (0.71520001) |
| 146 | r1 = splat 3D93DD98 (0.0722) |
| 147 | r1 = mul_f32 r2 r1 |
| 148 | r1 = mad_f32 r2 r0 r1 |
| 149 | r1 = mad_f32 r2 r3 r1 |
| 150 | r3 = splat 437F0000 (255) |
| 151 | r2 = splat 3F000000 (0.5) |
| 152 | r2 = mad_f32 r1 r3 r2 |
| 153 | r2 = to_i32 r2 |
| 154 | store8 arg(1) r2 |
| 155 | )", |
| 156 | R"(r0 = splat 0 (0) |
| 157 | r1 = load8 arg(0) |
| 158 | r2 = splat 3B808081 (0.0039215689) |
| 159 | r1 = to_f32 r1 |
| 160 | r1 = mul_f32 r2 r1 |
| 161 | r3 = load32 arg(1) |
| 162 | r4 = splat FF (3.5733111e-43) |
| 163 | r5 = bit_and r3 r4 |
| 164 | r5 = to_f32 r5 |
| 165 | r5 = mul_f32 r2 r5 |
| 166 | r6 = shr r3 8 (1.1210388e-44) |
| 167 | r6 = bit_and r6 r4 |
| 168 | r6 = to_f32 r6 |
| 169 | r6 = mul_f32 r2 r6 |
| 170 | r7 = shr r3 10 (2.2420775e-44) |
| 171 | r7 = bit_and r7 r4 |
| 172 | r7 = to_f32 r7 |
| 173 | r7 = mul_f32 r2 r7 |
| 174 | r3 = shr r3 18 (3.3631163e-44) |
| 175 | r3 = to_f32 r3 |
| 176 | r3 = mul_f32 r2 r3 |
| 177 | r2 = splat 3F800000 (1) |
| 178 | r2 = sub_f32 r2 r1 |
| 179 | r5 = mad_f32 r5 r2 r0 |
| 180 | r6 = mad_f32 r6 r2 r0 |
| 181 | r7 = mad_f32 r7 r2 r0 |
| 182 | r2 = mad_f32 r3 r2 r1 |
| 183 | r3 = splat 437F0000 (255) |
| 184 | r1 = splat 3F000000 (0.5) |
| 185 | r5 = mad_f32 r5 r3 r1 |
| 186 | r5 = to_i32 r5 |
| 187 | r6 = mad_f32 r6 r3 r1 |
| 188 | r6 = to_i32 r6 |
| 189 | r6 = shl r6 8 (1.1210388e-44) |
| 190 | r7 = mad_f32 r7 r3 r1 |
| 191 | r7 = to_i32 r7 |
| 192 | r7 = shl r7 10 (2.2420775e-44) |
| 193 | r1 = mad_f32 r2 r3 r1 |
| 194 | r1 = to_i32 r1 |
| 195 | r1 = shl r1 18 (3.3631163e-44) |
| 196 | r6 = bit_or r5 r6 |
| 197 | r6 = bit_or r6 r7 |
| 198 | r6 = bit_or r6 r1 |
| 199 | store32 arg(1) r6 |
| 200 | )", |
| 201 | R"(r0 = splat 3B808081 (0.0039215689) |
| 202 | r1 = splat 3F800000 (1) |
| 203 | r2 = load8 arg(1) |
| 204 | r2 = to_f32 r2 |
| 205 | r2 = mul_f32 r0 r2 |
| 206 | r0 = sub_f32 r1 r1 |
| 207 | r0 = mad_f32 r2 r0 r1 |
| 208 | r2 = splat 437F0000 (255) |
| 209 | r1 = splat 3F000000 (0.5) |
| 210 | r1 = mad_f32 r0 r2 r1 |
| 211 | r1 = to_i32 r1 |
| 212 | store8 arg(1) r1 |
| 213 | )", |
| 214 | R"(r0 = load8 arg(0) |
| 215 | r1 = splat 3B808081 (0.0039215689) |
| 216 | r0 = to_f32 r0 |
| 217 | r0 = mul_f32 r1 r0 |
| 218 | r2 = splat 3F800000 (1) |
| 219 | r3 = load8 arg(1) |
| 220 | r3 = to_f32 r3 |
| 221 | r3 = mul_f32 r1 r3 |
| 222 | r2 = sub_f32 r2 r2 |
| 223 | r2 = mad_f32 r3 r2 r0 |
| 224 | r3 = splat 3E59B3D0 (0.21259999) |
| 225 | r0 = splat 3F371759 (0.71520001) |
| 226 | r1 = splat 3D93DD98 (0.0722) |
| 227 | r1 = mul_f32 r2 r1 |
| 228 | r1 = mad_f32 r2 r0 r1 |
| 229 | r1 = mad_f32 r2 r3 r1 |
| 230 | r3 = splat 437F0000 (255) |
| 231 | r2 = splat 3F000000 (0.5) |
| 232 | r2 = mad_f32 r1 r3 r2 |
| 233 | r2 = to_i32 r2 |
| 234 | store8 arg(1) r2 |
| 235 | )", |
| 236 | R"(r0 = load8 arg(0) |
| 237 | r1 = splat 3B808081 (0.0039215689) |
| 238 | r0 = to_f32 r0 |
| 239 | r0 = mul_f32 r1 r0 |
| 240 | r2 = splat 3F800000 (1) |
| 241 | r3 = load32 arg(1) |
| 242 | r4 = splat FF (3.5733111e-43) |
| 243 | r5 = bit_and r3 r4 |
| 244 | r5 = to_f32 r5 |
| 245 | r5 = mul_f32 r1 r5 |
| 246 | r6 = shr r3 8 (1.1210388e-44) |
| 247 | r6 = bit_and r6 r4 |
| 248 | r6 = to_f32 r6 |
| 249 | r6 = mul_f32 r1 r6 |
| 250 | r7 = shr r3 10 (2.2420775e-44) |
| 251 | r7 = bit_and r7 r4 |
| 252 | r7 = to_f32 r7 |
| 253 | r7 = mul_f32 r1 r7 |
| 254 | r3 = shr r3 18 (3.3631163e-44) |
| 255 | r3 = to_f32 r3 |
| 256 | r3 = mul_f32 r1 r3 |
| 257 | r1 = sub_f32 r2 r2 |
| 258 | r5 = mad_f32 r5 r1 r0 |
| 259 | r6 = mad_f32 r6 r1 r0 |
| 260 | r7 = mad_f32 r7 r1 r0 |
| 261 | r1 = mad_f32 r3 r1 r2 |
| 262 | r3 = splat 437F0000 (255) |
| 263 | r2 = splat 3F000000 (0.5) |
| 264 | r5 = mad_f32 r5 r3 r2 |
| 265 | r5 = to_i32 r5 |
| 266 | r6 = mad_f32 r6 r3 r2 |
| 267 | r6 = to_i32 r6 |
| 268 | r6 = shl r6 8 (1.1210388e-44) |
| 269 | r7 = mad_f32 r7 r3 r2 |
| 270 | r7 = to_i32 r7 |
| 271 | r7 = shl r7 10 (2.2420775e-44) |
| 272 | r2 = mad_f32 r1 r3 r2 |
| 273 | r2 = to_i32 r2 |
| 274 | r2 = shl r2 18 (3.3631163e-44) |
| 275 | r6 = bit_or r5 r6 |
| 276 | r6 = bit_or r6 r7 |
| 277 | r6 = bit_or r6 r2 |
| 278 | store32 arg(1) r6 |
| 279 | )", |
| 280 | R"(r0 = load32 arg(0) |
| 281 | r1 = splat 3B808081 (0.0039215689) |
| 282 | r0 = shr r0 18 (3.3631163e-44) |
| 283 | r0 = to_f32 r0 |
| 284 | r0 = mul_f32 r1 r0 |
| 285 | r2 = load8 arg(1) |
| 286 | r2 = to_f32 r2 |
| 287 | r2 = mul_f32 r1 r2 |
| 288 | r1 = splat 3F800000 (1) |
| 289 | r1 = sub_f32 r1 r0 |
| 290 | r1 = mad_f32 r2 r1 r0 |
| 291 | r2 = splat 437F0000 (255) |
| 292 | r0 = splat 3F000000 (0.5) |
| 293 | r0 = mad_f32 r1 r2 r0 |
| 294 | r0 = to_i32 r0 |
| 295 | store8 arg(1) r0 |
| 296 | )", |
| 297 | R"(r0 = load32 arg(0) |
| 298 | r1 = splat FF (3.5733111e-43) |
| 299 | r2 = bit_and r0 r1 |
| 300 | r3 = splat 3B808081 (0.0039215689) |
| 301 | r2 = to_f32 r2 |
| 302 | r2 = mul_f32 r3 r2 |
| 303 | r4 = shr r0 8 (1.1210388e-44) |
| 304 | r4 = bit_and r4 r1 |
| 305 | r4 = to_f32 r4 |
| 306 | r4 = mul_f32 r3 r4 |
| 307 | r5 = shr r0 10 (2.2420775e-44) |
| 308 | r5 = bit_and r5 r1 |
| 309 | r5 = to_f32 r5 |
| 310 | r5 = mul_f32 r3 r5 |
| 311 | r0 = shr r0 18 (3.3631163e-44) |
| 312 | r0 = to_f32 r0 |
| 313 | r0 = mul_f32 r3 r0 |
| 314 | r1 = load8 arg(1) |
| 315 | r1 = to_f32 r1 |
| 316 | r1 = mul_f32 r3 r1 |
| 317 | r3 = splat 3F800000 (1) |
| 318 | r3 = sub_f32 r3 r0 |
| 319 | r2 = mad_f32 r1 r3 r2 |
| 320 | r4 = mad_f32 r1 r3 r4 |
| 321 | r3 = mad_f32 r1 r3 r5 |
| 322 | r1 = splat 3E59B3D0 (0.21259999) |
| 323 | r5 = splat 3F371759 (0.71520001) |
| 324 | r0 = splat 3D93DD98 (0.0722) |
| 325 | r0 = mul_f32 r3 r0 |
| 326 | r0 = mad_f32 r4 r5 r0 |
| 327 | r0 = mad_f32 r2 r1 r0 |
| 328 | r1 = splat 437F0000 (255) |
| 329 | r2 = splat 3F000000 (0.5) |
| 330 | r2 = mad_f32 r0 r1 r2 |
| 331 | r2 = to_i32 r2 |
| 332 | store8 arg(1) r2 |
| 333 | )", |
| 334 | R"(r0 = load32 arg(0) |
| 335 | r1 = splat FF (3.5733111e-43) |
| 336 | r2 = bit_and r0 r1 |
| 337 | r3 = splat 3B808081 (0.0039215689) |
| 338 | r2 = to_f32 r2 |
| 339 | r2 = mul_f32 r3 r2 |
| 340 | r4 = shr r0 8 (1.1210388e-44) |
| 341 | r4 = bit_and r4 r1 |
| 342 | r4 = to_f32 r4 |
| 343 | r4 = mul_f32 r3 r4 |
| 344 | r5 = shr r0 10 (2.2420775e-44) |
| 345 | r5 = bit_and r5 r1 |
| 346 | r5 = to_f32 r5 |
| 347 | r5 = mul_f32 r3 r5 |
| 348 | r0 = shr r0 18 (3.3631163e-44) |
| 349 | r0 = to_f32 r0 |
| 350 | r0 = mul_f32 r3 r0 |
| 351 | r6 = load32 arg(1) |
| 352 | r7 = bit_and r6 r1 |
| 353 | r7 = to_f32 r7 |
| 354 | r7 = mul_f32 r3 r7 |
| 355 | r8 = shr r6 8 (1.1210388e-44) |
| 356 | r8 = bit_and r8 r1 |
| 357 | r8 = to_f32 r8 |
| 358 | r8 = mul_f32 r3 r8 |
| 359 | r9 = shr r6 10 (2.2420775e-44) |
| 360 | r9 = bit_and r9 r1 |
| 361 | r9 = to_f32 r9 |
| 362 | r9 = mul_f32 r3 r9 |
| 363 | r6 = shr r6 18 (3.3631163e-44) |
| 364 | r6 = to_f32 r6 |
| 365 | r6 = mul_f32 r3 r6 |
| 366 | r3 = splat 3F800000 (1) |
| 367 | r3 = sub_f32 r3 r0 |
| 368 | r7 = mad_f32 r7 r3 r2 |
| 369 | r8 = mad_f32 r8 r3 r4 |
| 370 | r9 = mad_f32 r9 r3 r5 |
| 371 | r3 = mad_f32 r6 r3 r0 |
| 372 | r6 = splat 437F0000 (255) |
| 373 | r0 = splat 3F000000 (0.5) |
| 374 | r7 = mad_f32 r7 r6 r0 |
| 375 | r7 = to_i32 r7 |
| 376 | r8 = mad_f32 r8 r6 r0 |
| 377 | r8 = to_i32 r8 |
| 378 | r8 = shl r8 8 (1.1210388e-44) |
| 379 | r9 = mad_f32 r9 r6 r0 |
| 380 | r9 = to_i32 r9 |
| 381 | r9 = shl r9 10 (2.2420775e-44) |
| 382 | r0 = mad_f32 r3 r6 r0 |
| 383 | r0 = to_i32 r0 |
| 384 | r0 = shl r0 18 (3.3631163e-44) |
| 385 | r8 = bit_or r7 r8 |
| 386 | r8 = bit_or r8 r9 |
| 387 | r8 = bit_or r8 r0 |
| 388 | store32 arg(1) r8 |
| 389 | )", |
| 390 | }; |
| 391 | |
| 392 | DEF_TEST(SkVM, r) { |
| 393 | for (int s = 0; s < 3; s++) |
| 394 | for (int d = 0; d < 3; d++) { |
| 395 | auto srcFmt = (Fmt)s, |
| 396 | dstFmt = (Fmt)d; |
| 397 | skvm::Program program = SrcoverBuilder{srcFmt, dstFmt}.done(); |
| 398 | |
| 399 | SkDynamicMemoryWStream buf; |
| 400 | program.dump(&buf); |
| 401 | sk_sp<SkData> blob = buf.detachAsData(); |
| 402 | |
| 403 | bool train = false; |
| 404 | if (train) { |
| 405 | SkDebugf("R\"(%.*s)\",\n", blob->size(), blob->data()); |
| 406 | } else if (0 != memcmp(kExpected[3*s+d], blob->data(), blob->size())) { |
| 407 | ERRORF(r, "SkVMTest needs retraining.\n"); |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | { |
| 412 | skvm::Program program = SrcoverBuilder{RGBA_8888, RGBA_8888}.done(); |
| 413 | |
| 414 | uint32_t src = 0xbb007733, |
| 415 | dst = 0xffaaccee; |
| 416 | SkPMColor want = SkPMSrcOver(src, dst); // 0xff2dad73 |
| 417 | |
| 418 | program.eval(1, &src, &dst); |
| 419 | |
| 420 | // dst is probably 0xff2dad72. |
| 421 | for (int i = 0; i < 4; i++) { |
| 422 | uint8_t d = dst, |
| 423 | w = want; |
| 424 | REPORTER_ASSERT(r, abs(d-w) < 2); |
| 425 | dst >>= 8; |
| 426 | want >>= 8; |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | { |
| 431 | skvm::Program program = SrcoverBuilder{RGBA_8888, G8}.done(); |
| 432 | |
| 433 | uint32_t src = 0xbb007733; |
| 434 | uint8_t dst = 0x42; |
| 435 | SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00), 0xff424242); |
| 436 | |
| 437 | uint8_t want = SkComputeLuminance(SkGetPackedR32(over), |
| 438 | SkGetPackedG32(over), |
| 439 | SkGetPackedB32(over)); |
| 440 | program.eval(1, &src, &dst); |
| 441 | |
| 442 | REPORTER_ASSERT(r, abs(dst-want) < 3); |
| 443 | } |
| 444 | |
| 445 | { |
| 446 | skvm::Program program = SrcoverBuilder{A8, A8}.done(); |
| 447 | |
| 448 | uint8_t src[256], |
| 449 | dst[256]; |
| 450 | for (int i = 0; i < 256; i++) { |
| 451 | src[i] = 255 - i; |
| 452 | dst[i] = i; |
| 453 | } |
| 454 | |
| 455 | program.eval(256, src, dst); |
| 456 | |
| 457 | for (int i = 0; i < 256; i++) { |
| 458 | uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0), |
| 459 | SkPackARGB32( i, 0,0,0))); |
| 460 | REPORTER_ASSERT(r, abs(dst[i]-want) < 2); |
| 461 | } |
| 462 | } |
| 463 | } |