| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright 2016 Google Inc. | 
 | 3 |  * | 
 | 4 |  * Use of this source code is governed by a BSD-style license that can be | 
 | 5 |  * found in the LICENSE file. | 
 | 6 |  */ | 
 | 7 |  | 
 | 8 | #include "Benchmark.h" | 
 | 9 | #include "SkRasterPipeline.h" | 
 | 10 | #include "SkSRGB.h" | 
 | 11 |  | 
 | 12 | static const int N = 1023; | 
 | 13 |  | 
 | 14 | static uint32_t dst[N], | 
 | 15 |                 src[N]; | 
 | 16 | static uint8_t mask[N]; | 
 | 17 |  | 
 | 18 | // We'll build up a somewhat realistic useful pipeline: | 
 | 19 | //   - load srgb src | 
 | 20 | //   - scale src by 8-bit mask | 
 | 21 | //   - load srgb dst | 
 | 22 | //   - src = srcover(dst, src) | 
 | 23 | //   - store src back as srgb | 
 | 24 | // Every stage except for srcover interacts with memory, and so will need _tail variants. | 
 | 25 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 26 | SK_RASTER_STAGE(load_s_srgb) { | 
 | 27 |     auto ptr = (const uint32_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 28 |  | 
 | 29 |     r = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  0) & 0xff], | 
 | 30 |               sk_linear_from_srgb[(ptr[1] >>  0) & 0xff], | 
 | 31 |               sk_linear_from_srgb[(ptr[2] >>  0) & 0xff], | 
 | 32 |               sk_linear_from_srgb[(ptr[3] >>  0) & 0xff] }; | 
 | 33 |  | 
 | 34 |     g = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  8) & 0xff], | 
 | 35 |               sk_linear_from_srgb[(ptr[1] >>  8) & 0xff], | 
 | 36 |               sk_linear_from_srgb[(ptr[2] >>  8) & 0xff], | 
 | 37 |               sk_linear_from_srgb[(ptr[3] >>  8) & 0xff] }; | 
 | 38 |  | 
 | 39 |     b = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], | 
 | 40 |               sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], | 
 | 41 |               sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], | 
 | 42 |               sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; | 
 | 43 |  | 
 | 44 |     a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 45 | } | 
 | 46 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 47 | SK_RASTER_STAGE(load_s_srgb_tail) { | 
 | 48 |     auto ptr = (const uint32_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 49 |  | 
 | 50 |     r = Sk4f{ sk_linear_from_srgb[(*ptr >>  0) & 0xff], 0,0,0 }; | 
 | 51 |     g = Sk4f{ sk_linear_from_srgb[(*ptr >>  8) & 0xff], 0,0,0 }; | 
 | 52 |     b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; | 
 | 53 |     a = Sk4f{                (*ptr >> 24) * (1/255.0f), 0,0,0 }; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 54 | } | 
 | 55 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 56 | SK_RASTER_STAGE(load_d_srgb) { | 
 | 57 |     auto ptr = (const uint32_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 58 |  | 
 | 59 |     dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  0) & 0xff], | 
 | 60 |                sk_linear_from_srgb[(ptr[1] >>  0) & 0xff], | 
 | 61 |                sk_linear_from_srgb[(ptr[2] >>  0) & 0xff], | 
 | 62 |                sk_linear_from_srgb[(ptr[3] >>  0) & 0xff] }; | 
 | 63 |  | 
 | 64 |     dg = Sk4f{ sk_linear_from_srgb[(ptr[0] >>  8) & 0xff], | 
 | 65 |                sk_linear_from_srgb[(ptr[1] >>  8) & 0xff], | 
 | 66 |                sk_linear_from_srgb[(ptr[2] >>  8) & 0xff], | 
 | 67 |                sk_linear_from_srgb[(ptr[3] >>  8) & 0xff] }; | 
 | 68 |  | 
 | 69 |     db = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], | 
 | 70 |                sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], | 
 | 71 |                sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], | 
 | 72 |                sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; | 
 | 73 |  | 
 | 74 |     da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 75 | } | 
 | 76 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 77 | SK_RASTER_STAGE(load_d_srgb_tail) { | 
 | 78 |     auto ptr = (const uint32_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 79 |  | 
 | 80 |     dr = Sk4f{ sk_linear_from_srgb[(*ptr >>  0) & 0xff], 0,0,0 }; | 
 | 81 |     dg = Sk4f{ sk_linear_from_srgb[(*ptr >>  8) & 0xff], 0,0,0 }; | 
 | 82 |     db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; | 
 | 83 |     da = Sk4f{                (*ptr >> 24) * (1/255.0f), 0,0,0 }; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 84 | } | 
 | 85 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 86 | SK_RASTER_STAGE(scale_u8) { | 
 | 87 |     auto ptr = (const uint8_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 88 |  | 
 | 89 |     auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f); | 
 | 90 |     r *= c; | 
 | 91 |     g *= c; | 
 | 92 |     b *= c; | 
 | 93 |     a *= c; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 94 | } | 
 | 95 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 96 | SK_RASTER_STAGE(scale_u8_tail) { | 
 | 97 |     auto ptr = (const uint8_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 98 |  | 
 | 99 |     auto c = *ptr * (1/255.0f); | 
 | 100 |     r *= c; | 
 | 101 |     g *= c; | 
 | 102 |     b *= c; | 
 | 103 |     a *= c; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 104 | } | 
 | 105 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 106 | SK_RASTER_STAGE(srcover) { | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 107 |     auto A = 1.0f - a; | 
 | 108 |     r += dr * A; | 
 | 109 |     g += dg * A; | 
 | 110 |     b += db * A; | 
 | 111 |     a += da * A; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 112 | } | 
 | 113 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 114 | SK_RASTER_STAGE(store_srgb) { | 
 | 115 |     auto ptr = (uint32_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 116 |  | 
| mtklein | 566ea9b | 2016-07-20 12:10:11 -0700 | [diff] [blame] | 117 |     ( sk_linear_to_srgb(r) | 
 | 118 |     | sk_linear_to_srgb(g) << 8 | 
 | 119 |     | sk_linear_to_srgb(b) << 16 | 
 | 120 |     | Sk4f_round(255.0f*a) << 24).store(ptr); | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 121 | } | 
 | 122 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 123 | SK_RASTER_STAGE(store_srgb_tail) { | 
 | 124 |     auto ptr = (uint32_t*)ctx + x; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 125 |  | 
| mtklein | 566ea9b | 2016-07-20 12:10:11 -0700 | [diff] [blame] | 126 |     Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); | 
 | 127 |     rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)}; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 128 |  | 
 | 129 |     SkNx_cast<uint8_t>(rgba).store(ptr); | 
 | 130 | } | 
 | 131 |  | 
 | 132 | class SkRasterPipelineBench : public Benchmark { | 
 | 133 | public: | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 134 |     SkRasterPipelineBench(bool fused) : fFused(fused) {} | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 135 |  | 
 | 136 |     bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 137 |     const char* onGetName() override { return fFused ? "SkRasterPipelineBench_fused" | 
 | 138 |                                                      : "SkRasterPipelineBench_pipeline"; } | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 139 |  | 
 | 140 |     void onDraw(int loops, SkCanvas*) override { | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 141 |         while (loops --> 0) { | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 142 |             fFused ? this->runFused() : this->runPipeline(); | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 143 |         } | 
 | 144 |     } | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 145 |  | 
 | 146 |     void runFused() { | 
 | 147 |         Sk4f r,g,b,a, dr,dg,db,da; | 
 | 148 |         size_t x = 0, n = N; | 
 | 149 |         while (n >= 4) { | 
 | 150 |             load_s_srgb(src    , x, r,g,b,a, dr,dg,db,da); | 
 | 151 |             scale_u8   (mask   , x, r,g,b,a, dr,dg,da,da); | 
 | 152 |             load_d_srgb(dst    , x, r,g,b,a, dr,dg,da,da); | 
 | 153 |             srcover    (nullptr, x, r,g,b,a, dr,dg,da,da); | 
 | 154 |             store_srgb (dst    , x, r,g,b,a, dr,dg,da,da); | 
 | 155 |  | 
 | 156 |             x += 4; | 
 | 157 |             n -= 4; | 
 | 158 |         } | 
 | 159 |         while (n > 0) { | 
 | 160 |             load_s_srgb_tail(src    , x, r,g,b,a, dr,dg,db,da); | 
 | 161 |             scale_u8_tail   (mask   , x, r,g,b,a, dr,dg,da,da); | 
 | 162 |             load_d_srgb_tail(dst    , x, r,g,b,a, dr,dg,da,da); | 
 | 163 |             srcover         (nullptr, x, r,g,b,a, dr,dg,da,da); | 
 | 164 |             store_srgb_tail (dst    , x, r,g,b,a, dr,dg,da,da); | 
 | 165 |  | 
 | 166 |             x += 1; | 
 | 167 |             n -= 1; | 
 | 168 |         } | 
 | 169 |     } | 
 | 170 |  | 
 | 171 |     void runPipeline() { | 
 | 172 |         SkRasterPipeline p; | 
 | 173 |         p.append<load_s_srgb, load_s_srgb_tail>( src); | 
 | 174 |         p.append<   scale_u8,    scale_u8_tail>(mask); | 
 | 175 |         p.append<load_d_srgb, load_d_srgb_tail>( dst); | 
 | 176 |         p.append<srcover>(); | 
 | 177 |         p.append< store_srgb,  store_srgb_tail>( dst); | 
 | 178 |  | 
 | 179 |         p.run(N); | 
 | 180 |     } | 
 | 181 |  | 
 | 182 |     bool fFused; | 
| mtklein | 281b33f | 2016-07-12 15:01:26 -0700 | [diff] [blame] | 183 | }; | 
 | 184 |  | 
| mtklein | fe2042e | 2016-07-29 14:27:41 -0700 | [diff] [blame] | 185 | DEF_BENCH( return new SkRasterPipelineBench(true); ) | 
 | 186 | DEF_BENCH( return new SkRasterPipelineBench(false); ) |