blob: 5f24260ea0464d6643abd1de052c98509939e829 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "bench/Benchmark.h"
9#include "src/core/SkOpts.h"
10#include "src/core/SkVM.h"
11
12// N.B. I have not tested that the math performed by these benchmarks is correct.
13// They're really more meant to be representative load. (Wouldn't hurt to be correct though.)
14
15namespace {
16
17 enum Mode {Opts, RP, F32, I32, I32_SWAR};
18 static const char* kMode_name[] = { "Opts", "RP","F32", "I32", "I32_SWAR" };
19
20 struct SrcoverBuilder_F32 : public skvm::Builder {
21 SrcoverBuilder_F32() {
22
23 skvm::Arg src = arg(0),
24 dst = arg(1);
25
26 auto byte_to_f32 = [&](skvm::I32 byte) {
27 return mul(splat(1/255.0f), to_f32(byte));
28 };
29 auto f32_to_byte = [&](skvm::F32 f32) {
30 return to_i32(mad(f32, splat(255.0f), splat(0.5f)));
31 };
32
33 auto load = [&](skvm::Arg ptr,
34 skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
35 skvm::I32 rgba = load32(ptr);
36 *r = byte_to_f32(bit_and( rgba , splat(0xff)));
37 *g = byte_to_f32(bit_and(shr(rgba, 8), splat(0xff)));
38 *b = byte_to_f32(bit_and(shr(rgba, 16), splat(0xff)));
39 *a = byte_to_f32( shr(rgba, 24) );
40 };
41
42 skvm::F32 r,g,b,a;
43 load(src, &r,&g,&b,&a);
44
45 skvm::F32 dr,dg,db,da;
46 load(dst, &dr,&dg,&db,&da);
47
48 skvm::F32 invA = sub(splat(1.0f), a);
49 r = mad(dr, invA, r);
50 g = mad(dg, invA, g);
51 b = mad(db, invA, b);
52 a = mad(da, invA, a);
53
54 store32(dst, bit_or( f32_to_byte(r) ,
55 bit_or(shl(f32_to_byte(g), 8),
56 bit_or(shl(f32_to_byte(b), 16),
57 shl(f32_to_byte(a), 24)))));
58 }
59 };
60
61 struct SrcoverBuilder_I32 : public skvm::Builder {
62 SrcoverBuilder_I32() {
63 skvm::Arg src = arg(0),
64 dst = arg(1);
65
66 auto load = [&](skvm::Arg ptr,
67 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
68 skvm::I32 rgba = load32(ptr);
69 *r = bit_and( rgba , splat(0xff));
70 *g = bit_and(shr(rgba, 8), splat(0xff));
71 *b = bit_and(shr(rgba, 16), splat(0xff));
72 *a = shr(rgba, 24) ;
73 };
74
75 auto mul_unorm8 = [&](skvm::I32 x, skvm::I32 y) {
76 // (x*y + 127)/255 ~= (x*y+255)/256
77 return shr(add(mul(x, y), splat(0xff)), 8);
78 };
79
80 skvm::I32 r,g,b,a;
81 load(src, &r,&g,&b,&a);
82
83 skvm::I32 dr,dg,db,da;
84 load(dst, &dr,&dg,&db,&da);
85
86 skvm::I32 invA = sub(splat(0xff), a);
87 r = add(r, mul_unorm8(dr, invA));
88 g = add(g, mul_unorm8(dr, invA));
89 b = add(b, mul_unorm8(dr, invA));
90 a = add(a, mul_unorm8(dr, invA));
91
92 store32(dst, bit_or( r ,
93 bit_or(shl(g, 8),
94 bit_or(shl(b, 16),
95 shl(a, 24)))));
96 }
97 };
98
99 struct SrcoverBuilder_I32_SWAR : public skvm::Builder {
100 SrcoverBuilder_I32_SWAR() {
101 skvm::Arg src = arg(0),
102 dst = arg(1);
103
104 auto load = [&](skvm::Arg ptr,
105 skvm::I32* rb, skvm::I32* ga) {
106 skvm::I32 rgba = load32(ptr);
107 *rb = bit_and( rgba, splat(0x00ff00ff));
108 *ga = bit_and(shr(rgba, 8), splat(0x00ff00ff));
109 };
110
111 auto mul_unorm8 = [&](skvm::I32 x, skvm::I32 y) {
112 // As above, assuming x is two SWAR bytes in lanes 0 and 2, and y is a byte.
113 return shr(add(mul(x, y), splat(0x00ff00ff)), 8);
114 };
115
116 skvm::I32 rb, ga;
117 load(src, &rb, &ga);
118
119 skvm::I32 drb, dga;
120 load(dst, &drb, &dga);
121
122 skvm::I32 invA = sub(splat(0xff), shr(ga, 16));
123 rb = add(rb, mul_unorm8(drb, invA));
124 ga = add(ga, mul_unorm8(dga, invA));
125
126 store32(dst, bit_or(rb, shl(ga, 8)));
127 }
128 };
129}
130
131class SkVMBench : public Benchmark {
132public:
133 SkVMBench(int pixels, Mode mode)
134 : fPixels(pixels)
135 , fMode(mode)
136 , fName(SkStringPrintf("SkVM_%d_%s", pixels, kMode_name[mode]))
137 {}
138
139private:
140 const char* onGetName() override { return fName.c_str(); }
141 bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
142
143 void onDelayedSetup() override {
144 this->setUnits(fPixels);
145 fSrc.resize(fPixels, 0x7f123456); // Arbitrary non-opaque non-transparent value.
146 fDst.resize(fPixels, 0xff987654); // Arbitrary value.
147
148 if (fMode == F32 ) { fProgram = SrcoverBuilder_F32 {}.done(); }
149 if (fMode == I32 ) { fProgram = SrcoverBuilder_I32 {}.done(); }
150 if (fMode == I32_SWAR) { fProgram = SrcoverBuilder_I32_SWAR{}.done(); }
151
152 if (fMode == RP) {
153 fSrcCtx = { fSrc.data(), 0 };
154 fDstCtx = { fDst.data(), 0 };
155 fPipeline.append(SkRasterPipeline::load_8888 , &fSrcCtx);
156 fPipeline.append(SkRasterPipeline::load_8888_dst, &fDstCtx);
157 fPipeline.append(SkRasterPipeline::srcover);
158 fPipeline.append(SkRasterPipeline::store_8888, &fDstCtx);
159 }
160 }
161
162 void onDraw(int loops, SkCanvas*) override {
163 while (loops --> 0) {
164 if (fMode == Opts) {
165 SkOpts::blit_row_s32a_opaque(fDst.data(), fSrc.data(), fPixels, 0xff);
166 } else if (fMode == RP) {
167 fPipeline.run(0,0,fPixels,1);
168 } else {
169 fProgram.eval(fPixels, fSrc.data(), fDst.data());
170 }
171 }
172 }
173
174 int fPixels;
175 Mode fMode;
176 SkString fName;
177 std::vector<uint32_t> fSrc,
178 fDst;
179 skvm::Program fProgram;
180
181 SkRasterPipeline_MemoryCtx fSrcCtx,
182 fDstCtx;
183 SkRasterPipeline_<256> fPipeline;
184};
185
186DEF_BENCH(return (new SkVMBench{ 1, Opts});)
187DEF_BENCH(return (new SkVMBench{ 4, Opts});)
188DEF_BENCH(return (new SkVMBench{ 16, Opts});)
189DEF_BENCH(return (new SkVMBench{ 64, Opts});)
190DEF_BENCH(return (new SkVMBench{ 256, Opts});)
191DEF_BENCH(return (new SkVMBench{1024, Opts});)
192DEF_BENCH(return (new SkVMBench{4096, Opts});)
193
194DEF_BENCH(return (new SkVMBench{ 1, RP});)
195DEF_BENCH(return (new SkVMBench{ 4, RP});)
196DEF_BENCH(return (new SkVMBench{ 16, RP});)
197DEF_BENCH(return (new SkVMBench{ 64, RP});)
198DEF_BENCH(return (new SkVMBench{ 256, RP});)
199DEF_BENCH(return (new SkVMBench{1024, RP});)
200DEF_BENCH(return (new SkVMBench{4096, RP});)
201
202DEF_BENCH(return (new SkVMBench{ 1, F32});)
203DEF_BENCH(return (new SkVMBench{ 4, F32});)
204DEF_BENCH(return (new SkVMBench{ 16, F32});)
205DEF_BENCH(return (new SkVMBench{ 64, F32});)
206DEF_BENCH(return (new SkVMBench{ 256, F32});)
207DEF_BENCH(return (new SkVMBench{1024, F32});)
208DEF_BENCH(return (new SkVMBench{4096, F32});)
209
210DEF_BENCH(return (new SkVMBench{ 1, I32});)
211DEF_BENCH(return (new SkVMBench{ 4, I32});)
212DEF_BENCH(return (new SkVMBench{ 16, I32});)
213DEF_BENCH(return (new SkVMBench{ 64, I32});)
214DEF_BENCH(return (new SkVMBench{ 256, I32});)
215DEF_BENCH(return (new SkVMBench{1024, I32});)
216DEF_BENCH(return (new SkVMBench{4096, I32});)
217
218DEF_BENCH(return (new SkVMBench{ 1, I32_SWAR});)
219DEF_BENCH(return (new SkVMBench{ 4, I32_SWAR});)
220DEF_BENCH(return (new SkVMBench{ 16, I32_SWAR});)
221DEF_BENCH(return (new SkVMBench{ 64, I32_SWAR});)
222DEF_BENCH(return (new SkVMBench{ 256, I32_SWAR});)
223DEF_BENCH(return (new SkVMBench{1024, I32_SWAR});)
224DEF_BENCH(return (new SkVMBench{4096, I32_SWAR});)