blob: f3b52751f311934db96af23b1fb365f1f789655b [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
Julia Lavrova20187a22021-12-21 14:33:35 +000013#include "src/gpu/GrShaderCaps.h"
14#include "src/sksl/SkSLCompiler.h"
15#include "src/sksl/codegen/SkSLVMCodeGenerator.h"
16#include "src/sksl/tracing/SkVMDebugTrace.h"
17#include "src/utils/SkVMVisualizer.h"
Mike Klein68c50d02019-05-29 12:57:54 -050018#include "tests/Test.h"
Mike Klein7e650762019-07-02 15:21:11 -050019
Mike Klein9977efa2019-07-15 12:22:36 -050020template <typename Fn>
Mike Kleinfc017c72021-02-08 10:45:19 -060021static void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) {
22 skvm::Program p = b.done();
23 test(p);
24 if (p.hasJIT()) {
25 test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false));
Mike Kleinb5a30762019-10-16 10:11:56 -050026 }
Mike Kleinb5a30762019-10-16 10:11:56 -050027}
28
Mike Klein7542ab52020-04-02 08:50:16 -050029DEF_TEST(SkVM_eliminate_dead_code, r) {
30 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -040031 {
Mike Klein00e43df2021-01-08 13:45:42 -060032 skvm::Ptr arg = b.varying<int>();
Mike Klein7542ab52020-04-02 08:50:16 -050033 skvm::I32 l = b.load32(arg);
34 skvm::I32 a = b.add(l, l);
35 b.add(a, b.splat(7));
36 }
Herb Derbyf20400e2020-03-18 16:11:25 -040037
Mike Klein7542ab52020-04-02 08:50:16 -050038 std::vector<skvm::Instruction> program = b.program();
39 REPORTER_ASSERT(r, program.size() == 4);
40
Mike Klein5b701e12020-04-02 10:34:24 -050041 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -050042 REPORTER_ASSERT(r, program.size() == 0);
43}
44
Mike Klein9fdadb92019-07-30 12:30:13 -050045DEF_TEST(SkVM_Pointless, r) {
46 // Let's build a program with no memory arguments.
47 // It should all be pegged as dead code, but we should be able to "run" it.
48 skvm::Builder b;
49 {
50 b.add(b.splat(5.0f),
51 b.splat(4.0f));
52 }
53
Mike Kleinfc017c72021-02-08 10:45:19 -060054 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -050055 for (int N = 0; N < 64; N++) {
56 program.eval(N);
57 }
58 });
59
Mike Kleined9b1f12020-02-06 13:02:32 -060060 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -050061 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -050062 }
63}
64
Mike Klein10fc1e62020-04-13 11:57:05 -050065DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -060066 skvm::Builder b;
67 b.store32(b.varying<int>(), b.splat(42));
68
Mike Kleinfc017c72021-02-08 10:45:19 -060069 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
Mike Klein10fc1e62020-04-13 11:57:05 -050070 int buf[18];
71 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -060072
Mike Klein10fc1e62020-04-13 11:57:05 -050073 p.eval(17, buf);
74 for (int i = 0; i < 17; i++) {
75 REPORTER_ASSERT(r, buf[i] == 42);
76 }
77 REPORTER_ASSERT(r, buf[17] == 47);
78 });
Mike Kleinb6149312020-02-26 13:04:23 -060079}
Mike Klein11efa182020-02-27 12:04:37 -060080
Mike Klein10fc1e62020-04-13 11:57:05 -050081DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -060082 skvm::Builder b;
83 {
84 auto src = b.varying<int>(),
85 dst = b.varying<int>();
86 b.store32(dst, b.load32(src));
87 }
88
Mike Kleinfc017c72021-02-08 10:45:19 -060089 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
Mike Klein10fc1e62020-04-13 11:57:05 -050090 int src[] = {1,2,3,4,5,6,7,8,9},
91 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -060092
Mike Klein10fc1e62020-04-13 11:57:05 -050093 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
94 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
95 REPORTER_ASSERT(r, dst[i] == src[i]);
96 }
97 size_t i = SK_ARRAY_COUNT(src)-1;
98 REPORTER_ASSERT(r, dst[i] == 0);
99 });
Mike Klein11efa182020-02-27 12:04:37 -0600100}
Mike Kleinb6149312020-02-26 13:04:23 -0600101
Mike Kleinc7c1f9c2021-02-08 10:24:52 -0600102DEF_TEST(SkVM_allow_jit, r) {
103 skvm::Builder b;
104 {
105 auto src = b.varying<int>(),
106 dst = b.varying<int>();
107 b.store32(dst, b.load32(src));
108 }
109
Herb Derby8516c0d2021-10-18 09:36:45 -0400110 if (b.done("test-allow_jit", /*allow_jit=*/true).hasJIT()) {
Mike Kleinc7c1f9c2021-02-08 10:24:52 -0600111 REPORTER_ASSERT(r, !b.done("", false).hasJIT());
112 }
113}
114
Mike Klein81756e42019-06-12 11:36:28 -0500115DEF_TEST(SkVM_LoopCounts, r) {
116 // Make sure we cover all the exact N we want.
117
Mike Klein9977efa2019-07-15 12:22:36 -0500118 // buf[i] += 1
119 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -0600120 skvm::Ptr arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500121 b.store32(arg,
122 b.add(b.splat(1),
123 b.load32(arg)));
124
Mike Kleinfc017c72021-02-08 10:45:19 -0600125 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500126 int buf[64];
127 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500128 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
129 buf[i] = i;
130 }
131 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500132
Mike Klein9977efa2019-07-15 12:22:36 -0500133 for (int i = 0; i < N; i++) {
134 REPORTER_ASSERT(r, buf[i] == i+1);
135 }
136 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
137 REPORTER_ASSERT(r, buf[i] == i);
138 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500139 }
140 });
Mike Klein81756e42019-06-12 11:36:28 -0500141}
Mike Klein05642042019-06-18 12:16:06 -0500142
Mike Kleinb2b6a992020-01-13 16:34:30 -0600143DEF_TEST(SkVM_gather32, r) {
144 skvm::Builder b;
145 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400146 skvm::UPtr uniforms = b.uniform();
147 skvm::Ptr buf = b.varying<int>();
Mike Kleinb2b6a992020-01-13 16:34:30 -0600148 skvm::I32 x = b.load32(buf);
149 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
150 }
151
Mike Kleinfc017c72021-02-08 10:45:19 -0600152 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600153 const int img[] = {12,34,56,78, 90,98,76,54};
154
155 int buf[20];
156 for (int i = 0; i < 20; i++) {
157 buf[i] = i;
158 }
159
160 struct Uniforms {
161 const int* img;
162 } uniforms{img};
163
164 program.eval(20, &uniforms, buf);
165 int i = 0;
166 REPORTER_ASSERT(r, buf[i] == 12); i++;
167 REPORTER_ASSERT(r, buf[i] == 34); i++;
168 REPORTER_ASSERT(r, buf[i] == 56); i++;
169 REPORTER_ASSERT(r, buf[i] == 78); i++;
170 REPORTER_ASSERT(r, buf[i] == 90); i++;
171 REPORTER_ASSERT(r, buf[i] == 98); i++;
172 REPORTER_ASSERT(r, buf[i] == 76); i++;
173 REPORTER_ASSERT(r, buf[i] == 54); i++;
174
175 REPORTER_ASSERT(r, buf[i] == 12); i++;
176 REPORTER_ASSERT(r, buf[i] == 34); i++;
177 REPORTER_ASSERT(r, buf[i] == 56); i++;
178 REPORTER_ASSERT(r, buf[i] == 78); i++;
179 REPORTER_ASSERT(r, buf[i] == 90); i++;
180 REPORTER_ASSERT(r, buf[i] == 98); i++;
181 REPORTER_ASSERT(r, buf[i] == 76); i++;
182 REPORTER_ASSERT(r, buf[i] == 54); i++;
183
184 REPORTER_ASSERT(r, buf[i] == 12); i++;
185 REPORTER_ASSERT(r, buf[i] == 34); i++;
186 REPORTER_ASSERT(r, buf[i] == 56); i++;
187 REPORTER_ASSERT(r, buf[i] == 78); i++;
188 });
189}
190
Mike Klein81d52672019-07-30 11:11:09 -0500191DEF_TEST(SkVM_gathers, r) {
192 skvm::Builder b;
193 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400194 skvm::UPtr uniforms = b.uniform();
195 skvm::Ptr buf32 = b.varying<int>(),
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600196 buf16 = b.varying<uint16_t>(),
197 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500198
199 skvm::I32 x = b.load32(buf32);
200
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600201 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
202 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
203 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500204 }
205
Mike Kleinfc017c72021-02-08 10:45:19 -0600206 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500207 const int img[] = {12,34,56,78, 90,98,76,54};
208
209 constexpr int N = 20;
210 int buf32[N];
211 uint16_t buf16[N];
212 uint8_t buf8 [N];
213
214 for (int i = 0; i < 20; i++) {
215 buf32[i] = i;
216 }
217
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600218 struct Uniforms {
219 const int* img;
220 } uniforms{img};
221
222 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500223 int i = 0;
224 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
225 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
226 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
227 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
228 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
229 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
230 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
231 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
232
233 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
234 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
235 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
236 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
237 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
238 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
239 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
240 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
241
242 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
243 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
244 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
245 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
246 });
247}
248
Mike Klein21e85eb2020-04-17 13:57:13 -0500249DEF_TEST(SkVM_gathers2, r) {
250 skvm::Builder b;
251 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400252 skvm::UPtr uniforms = b.uniform();
253 skvm::Ptr buf32 = b.varying<int>(),
Mike Klein21e85eb2020-04-17 13:57:13 -0500254 buf16 = b.varying<uint16_t>(),
255 buf8 = b.varying<uint8_t>();
256
257 skvm::I32 x = b.load32(buf32);
258
259 b.store32(buf32, b.gather32(uniforms,0, x));
260 b.store16(buf16, b.gather16(uniforms,0, x));
261 b.store8 (buf8 , b.gather8 (uniforms,0, x));
262 }
263
Mike Kleinfc017c72021-02-08 10:45:19 -0600264 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein21e85eb2020-04-17 13:57:13 -0500265 uint8_t img[256];
266 for (int i = 0; i < 256; i++) {
267 img[i] = i;
268 }
269
270 int buf32[64];
271 uint16_t buf16[64];
272 uint8_t buf8 [64];
273
274 for (int i = 0; i < 64; i++) {
275 buf32[i] = (i*47)&63;
276 buf16[i] = 0;
277 buf8 [i] = 0;
278 }
279
280 struct Uniforms {
281 const uint8_t* img;
282 } uniforms{img};
283
284 program.eval(64, &uniforms, buf32, buf16, buf8);
285
286 for (int i = 0; i < 64; i++) {
287 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
288 }
289
290 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
291 REPORTER_ASSERT(r, buf16[63] == 0x2322);
292
293 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
294 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
295 });
296}
297
Mike Klein81d52672019-07-30 11:11:09 -0500298DEF_TEST(SkVM_bitops, r) {
299 skvm::Builder b;
300 {
Mike Klein00e43df2021-01-08 13:45:42 -0600301 skvm::Ptr ptr = b.varying<int>();
Mike Klein81d52672019-07-30 11:11:09 -0500302
303 skvm::I32 x = b.load32(ptr);
304
Mike Klein4067a942020-04-05 10:25:32 -0500305 x = b.bit_and (x, b.splat(0xf1)); // 0x40
306 x = b.bit_or (x, b.splat(0x80)); // 0xc0
307 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
308 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500309
310 x = b.shl(x, 28); // 0xe000'0000
311 x = b.sra(x, 28); // 0xffff'fffe
312 x = b.shr(x, 1); // 0x7fff'ffff
313
314 b.store32(ptr, x);
315 }
316
Mike Kleinfc017c72021-02-08 10:45:19 -0600317 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500318 int x = 0x42;
319 program.eval(1, &x);
320 REPORTER_ASSERT(r, x == 0x7fff'ffff);
321 });
322}
323
Mike Klein4067a942020-04-05 10:25:32 -0500324DEF_TEST(SkVM_select_is_NaN, r) {
325 skvm::Builder b;
326 {
Mike Klein00e43df2021-01-08 13:45:42 -0600327 skvm::Ptr src = b.varying<float>(),
Mike Klein4067a942020-04-05 10:25:32 -0500328 dst = b.varying<float>();
329
330 skvm::F32 x = b.loadF(src);
331 x = select(is_NaN(x), b.splat(0.0f)
332 , x);
333 b.storeF(dst, x);
334 }
335
336 std::vector<skvm::OptimizedInstruction> program = b.optimize();
337 REPORTER_ASSERT(r, program.size() == 4);
338 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
339 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
340 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
341 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
342
Mike Kleinfc017c72021-02-08 10:45:19 -0600343 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500344 // ±NaN, ±0, ±1, ±inf
345 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
346 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
347 uint32_t dst[SK_ARRAY_COUNT(src)];
348 program.eval(SK_ARRAY_COUNT(src), src, dst);
349
350 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
351 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
352 }
353 });
354}
355
Mike Klein81d52672019-07-30 11:11:09 -0500356DEF_TEST(SkVM_f32, r) {
357 skvm::Builder b;
358 {
Mike Klein00e43df2021-01-08 13:45:42 -0600359 skvm::Ptr arg = b.varying<float>();
Mike Klein81d52672019-07-30 11:11:09 -0500360
Mike Reedf5ff4c22020-03-23 14:57:53 -0400361 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500362 y = b.add(x,x), // y = 2x
363 z = b.sub(y,x), // z = 2x-x = x
364 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400365 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500366 }
367
Mike Kleinfc017c72021-02-08 10:45:19 -0600368 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500369 float buf[] = { 1,2,3,4,5,6,7,8,9 };
370 program.eval(SK_ARRAY_COUNT(buf), buf);
371 for (float v : buf) {
372 REPORTER_ASSERT(r, v == 1.0f);
373 }
374 });
375}
376
377DEF_TEST(SkVM_cmp_i32, r) {
378 skvm::Builder b;
379 {
380 skvm::I32 x = b.load32(b.varying<int>());
381
382 auto to_bit = [&](int shift, skvm::I32 mask) {
383 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
384 };
385
386 skvm::I32 m = b.splat(0);
387 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
388 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
389 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
390 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
391 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
392 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
393
394 b.store32(b.varying<int>(), m);
395 }
Mike Kleinfc017c72021-02-08 10:45:19 -0600396 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500397 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
398 int out[SK_ARRAY_COUNT(in)];
399
400 program.eval(SK_ARRAY_COUNT(in), in, out);
401
402 REPORTER_ASSERT(r, out[0] == 0b001111);
403 REPORTER_ASSERT(r, out[1] == 0b001100);
404 REPORTER_ASSERT(r, out[2] == 0b001010);
405 REPORTER_ASSERT(r, out[3] == 0b001010);
406 REPORTER_ASSERT(r, out[4] == 0b000010);
407 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
408 REPORTER_ASSERT(r, out[i] == 0b110010);
409 }
410 });
411}
412
413DEF_TEST(SkVM_cmp_f32, r) {
414 skvm::Builder b;
415 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400416 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500417
418 auto to_bit = [&](int shift, skvm::I32 mask) {
419 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
420 };
421
422 skvm::I32 m = b.splat(0);
423 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
424 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
425 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
426 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
427 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
428 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
429
430 b.store32(b.varying<int>(), m);
431 }
432
Mike Kleinfc017c72021-02-08 10:45:19 -0600433 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500434 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
435 int out[SK_ARRAY_COUNT(in)];
436
437 program.eval(SK_ARRAY_COUNT(in), in, out);
438
439 REPORTER_ASSERT(r, out[0] == 0b001111);
440 REPORTER_ASSERT(r, out[1] == 0b001100);
441 REPORTER_ASSERT(r, out[2] == 0b001010);
442 REPORTER_ASSERT(r, out[3] == 0b001010);
443 REPORTER_ASSERT(r, out[4] == 0b000010);
444 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
445 REPORTER_ASSERT(r, out[i] == 0b110010);
446 }
447 });
448}
449
Mike Klein14548b92020-02-28 14:02:29 -0600450DEF_TEST(SkVM_index, r) {
451 skvm::Builder b;
452 b.store32(b.varying<int>(), b.index());
453
Mike Kleinfc017c72021-02-08 10:45:19 -0600454 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600455 int buf[23];
456 program.eval(SK_ARRAY_COUNT(buf), buf);
457 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
458 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
459 }
460 });
461}
462
Mike Klein4a131192019-07-19 13:56:41 -0500463DEF_TEST(SkVM_mad, r) {
464 // This program is designed to exercise the tricky corners of instruction
465 // and register selection for Op::mad_f32.
466
467 skvm::Builder b;
468 {
Mike Klein00e43df2021-01-08 13:45:42 -0600469 skvm::Ptr arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500470
Mike Kleincac130f2020-09-25 14:47:44 -0500471 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein4a131192019-07-19 13:56:41 -0500472 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
473 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
474 w = b.mad(z,z,y), // w can alias z but not y.
475 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600476 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500477 }
478
Mike Kleinfc017c72021-02-08 10:45:19 -0600479 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500480 int x = 2;
481 program.eval(1, &x);
482 // x = 2
483 // y = 2*2 + 2 = 6
484 // z = 6*6 + 2 = 38
485 // w = 38*38 + 6 = 1450
486 // v = 1450*6 + 1450 = 10150
487 REPORTER_ASSERT(r, x == 10150);
488 });
489}
490
Mike Klein7c0332c2020-03-05 14:18:04 -0600491DEF_TEST(SkVM_fms, r) {
492 // Create a pattern that can be peepholed into an Op::fms_f32.
493 skvm::Builder b;
494 {
Mike Klein00e43df2021-01-08 13:45:42 -0600495 skvm::Ptr arg = b.varying<int>();
Mike Klein7c0332c2020-03-05 14:18:04 -0600496
Mike Kleincac130f2020-09-25 14:47:44 -0500497 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600498 v = b.sub(b.mul(x, b.splat(2.0f)),
499 b.splat(1.0f));
500 b.store32(arg, b.trunc(v));
501 }
502
Mike Kleinfc017c72021-02-08 10:45:19 -0600503 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600504 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
505 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
506
507 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
508 REPORTER_ASSERT(r, buf[i] = 2*i-1);
509 }
510 });
511}
512
513DEF_TEST(SkVM_fnma, r) {
514 // Create a pattern that can be peepholed into an Op::fnma_f32.
515 skvm::Builder b;
516 {
Mike Klein00e43df2021-01-08 13:45:42 -0600517 skvm::Ptr arg = b.varying<int>();
Mike Klein7c0332c2020-03-05 14:18:04 -0600518
Mike Kleincac130f2020-09-25 14:47:44 -0500519 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600520 v = b.sub(b.splat(1.0f),
521 b.mul(x, b.splat(2.0f)));
522 b.store32(arg, b.trunc(v));
523 }
524
Mike Kleinfc017c72021-02-08 10:45:19 -0600525 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600526 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
527 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
528
529 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
530 REPORTER_ASSERT(r, buf[i] = 1-2*i);
531 }
532 });
533}
534
Mike Klein81d52672019-07-30 11:11:09 -0500535DEF_TEST(SkVM_madder, r) {
536 skvm::Builder b;
537 {
Mike Klein00e43df2021-01-08 13:45:42 -0600538 skvm::Ptr arg = b.varying<float>();
Mike Klein81d52672019-07-30 11:11:09 -0500539
Mike Reedf5ff4c22020-03-23 14:57:53 -0400540 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500541 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
542 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
543 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400544 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500545 }
546
Mike Kleinfc017c72021-02-08 10:45:19 -0600547 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500548 float x = 2.0f;
549 // y = 2*2 + 2 = 6
550 // z = 6*2 + 6 = 18
551 // w = 6*6 + 18 = 54
552 program.eval(1, &x);
553 REPORTER_ASSERT(r, x == 54.0f);
554 });
555}
556
Mike Kleinf22faaf2020-01-09 07:27:39 -0600557DEF_TEST(SkVM_floor, r) {
558 skvm::Builder b;
559 {
Mike Klein00e43df2021-01-08 13:45:42 -0600560 skvm::Ptr arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400561 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600562 }
563
Mike Kleinfc017c72021-02-08 10:45:19 -0600564 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600565 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
566 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
567 program.eval(SK_ARRAY_COUNT(buf), buf);
568 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
569 REPORTER_ASSERT(r, buf[i] == want[i]);
570 }
571 });
572}
573
Mike Klein5caf7de2020-03-12 11:05:46 -0500574DEF_TEST(SkVM_round, r) {
575 skvm::Builder b;
576 {
Mike Klein00e43df2021-01-08 13:45:42 -0600577 skvm::Ptr src = b.varying<float>();
578 skvm::Ptr dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400579 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500580 }
581
582 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
583 // We haven't explicitly guaranteed that here... it just probably is.
Mike Kleinfc017c72021-02-08 10:45:19 -0600584 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500585 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
586 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
587 int dst[SK_ARRAY_COUNT(buf)];
588
589 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
590 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
591 REPORTER_ASSERT(r, dst[i] == want[i]);
592 }
593 });
594}
595
Herb Derbyc02a41f2020-02-28 14:25:45 -0600596DEF_TEST(SkVM_min, r) {
597 skvm::Builder b;
598 {
Mike Klein00e43df2021-01-08 13:45:42 -0600599 skvm::Ptr src1 = b.varying<float>();
600 skvm::Ptr src2 = b.varying<float>();
601 skvm::Ptr dst = b.varying<float>();
Herb Derbyc02a41f2020-02-28 14:25:45 -0600602
Mike Reedf5ff4c22020-03-23 14:57:53 -0400603 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600604 }
605
Mike Kleinfc017c72021-02-08 10:45:19 -0600606 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600607 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
608 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
609 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
610 float d[SK_ARRAY_COUNT(s1)];
611 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
612 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
613 REPORTER_ASSERT(r, d[i] == want[i]);
614 }
615 });
616}
617
618DEF_TEST(SkVM_max, r) {
619 skvm::Builder b;
620 {
Mike Klein00e43df2021-01-08 13:45:42 -0600621 skvm::Ptr src1 = b.varying<float>();
622 skvm::Ptr src2 = b.varying<float>();
623 skvm::Ptr dst = b.varying<float>();
Herb Derbyc02a41f2020-02-28 14:25:45 -0600624
Mike Reedf5ff4c22020-03-23 14:57:53 -0400625 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600626 }
627
Mike Kleinfc017c72021-02-08 10:45:19 -0600628 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600629 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
630 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
631 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
632 float d[SK_ARRAY_COUNT(s1)];
633 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
634 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
635 REPORTER_ASSERT(r, d[i] == want[i]);
636 }
637 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600638}
639
Mike Kleinf98d0d32019-07-22 14:30:18 -0500640DEF_TEST(SkVM_hoist, r) {
641 // This program uses enough constants that it will fail to JIT if we hoist them.
642 // The JIT will try again without hoisting, and that'll just need 2 registers.
643 skvm::Builder b;
644 {
Mike Klein00e43df2021-01-08 13:45:42 -0600645 skvm::Ptr arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500646 skvm::I32 x = b.load32(arg);
647 for (int i = 0; i < 32; i++) {
648 x = b.add(x, b.splat(i));
649 }
650 b.store32(arg, x);
651 }
652
Mike Kleinfc017c72021-02-08 10:45:19 -0600653 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500654 int x = 4;
655 program.eval(1, &x);
656 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
657 // x += 496
658 REPORTER_ASSERT(r, x == 500);
659 });
660}
661
Mike Kleinb9944122019-08-02 12:22:39 -0500662DEF_TEST(SkVM_select, r) {
663 skvm::Builder b;
664 {
Mike Klein00e43df2021-01-08 13:45:42 -0600665 skvm::Ptr buf = b.varying<int>();
Mike Kleinb9944122019-08-02 12:22:39 -0500666
667 skvm::I32 x = b.load32(buf);
668
669 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
670
671 b.store32(buf, x);
672 }
673
Mike Kleinfc017c72021-02-08 10:45:19 -0600674 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500675 int buf[] = { 0,1,2,3,4,5,6,7,8 };
676 program.eval(SK_ARRAY_COUNT(buf), buf);
677 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
678 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
679 }
680 });
681}
682
Mike Kleinf471c822021-01-05 13:31:15 -0600683DEF_TEST(SkVM_swap, r) {
684 skvm::Builder b;
685 {
686 // This program is the equivalent of
687 // x = *X
688 // y = *Y
689 // *X = y
690 // *Y = x
691 // One rescheduling of the program based only on data flow of Op arguments is
692 // x = *X
693 // *Y = x
694 // y = *Y
695 // *X = y
696 // but this reordering does not produce the same results and is invalid.
Mike Klein00e43df2021-01-08 13:45:42 -0600697 skvm::Ptr X = b.varying<int>(),
Mike Kleinf471c822021-01-05 13:31:15 -0600698 Y = b.varying<int>();
699
700 skvm::I32 x = b.load32(X),
701 y = b.load32(Y);
702
703 b.store32(X, y);
704 b.store32(Y, x);
705 }
706
Mike Kleinfc017c72021-02-08 10:45:19 -0600707 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf471c822021-01-05 13:31:15 -0600708 int b1[] = { 0,1,2,3 };
709 int b2[] = { 4,5,6,7 };
710 program.eval(SK_ARRAY_COUNT(b1), b1, b2);
711 for (int i = 0; i < (int)SK_ARRAY_COUNT(b1); i++) {
712 REPORTER_ASSERT(r, b1[i] == 4 + i);
713 REPORTER_ASSERT(r, b2[i] == i);
714 }
715 });
716}
717
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500718DEF_TEST(SkVM_NewOps, r) {
719 // Exercise a somewhat arbitrary set of new ops.
720 skvm::Builder b;
721 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400722 skvm::Ptr buf = b.varying<int16_t>();
723 skvm::UPtr uniforms = b.uniform();
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500724
725 skvm::I32 x = b.load16(buf);
726
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600727 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500728
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600729 x = b.add(x, b.uniform32(uniforms, kPtr+0));
Mike Klein8b16bee2020-11-25 10:54:02 -0600730 x = b.mul(x, b.uniform32(uniforms, kPtr+4));
731 x = b.sub(x, b.uniform32(uniforms, kPtr+8));
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600732
Mike Klein8b16bee2020-11-25 10:54:02 -0600733 skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500734 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
735 x = b.select(b.gt(x, limit ), limit , x);
736
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600737 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500738
739 b.store16(buf, x);
740 }
741
Mike Kleinfc017c72021-02-08 10:45:19 -0600742 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500743 const int N = 31;
744 int16_t buf[N];
745 for (int i = 0; i < N; i++) {
746 buf[i] = i;
747 }
748
749 const int M = 16;
750 uint8_t img[M];
751 for (int i = 0; i < M; i++) {
752 img[i] = i*i;
753 }
754
755 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600756 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500757 int add = 5;
Mike Klein8b16bee2020-11-25 10:54:02 -0600758 int mul = 3;
759 int sub = 18;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500760 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600761 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500762
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600763 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500764
765 for (int i = 0; i < N; i++) {
766 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
767 int x = 3*(i-1);
768
769 // Then that's pinned to the limits of img.
770 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
771 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
772 REPORTER_ASSERT(r, buf[i] == img[x]);
773 }
774 });
775}
776
Herb Derbya37001e2021-07-22 17:34:21 -0400777DEF_TEST(SKVM_array32, r) {
Herb Derbya4953512021-07-23 11:08:25 -0400778
779
780
Herb Derbya37001e2021-07-22 17:34:21 -0400781 skvm::Builder b;
Herb Derbya4953512021-07-23 11:08:25 -0400782 skvm::Uniforms uniforms(b.uniform(), 0);
783 // Take up the first slot, so other uniforms are not at 0 offset.
784 uniforms.push(0);
785 int i[] = {3, 7};
786 skvm::Uniform array = uniforms.pushArray(i);
787 float f[] = {5, 9};
788 skvm::Uniform arrayF = uniforms.pushArrayF(f);
Herb Derbya37001e2021-07-22 17:34:21 -0400789 {
790 skvm::Ptr buf0 = b.varying<int32_t>(),
791 buf1 = b.varying<int32_t>(),
Herb Derbya4953512021-07-23 11:08:25 -0400792 buf2 = b.varying<int32_t>();
Herb Derbya37001e2021-07-22 17:34:21 -0400793
Herb Derbya4953512021-07-23 11:08:25 -0400794 skvm::I32 j = b.array32(array, 0);
795 b.store32(buf0, j);
796 skvm::I32 k = b.array32(array, 1);
797 b.store32(buf1, k);
798
799 skvm::F32 x = b.arrayF(arrayF, 0);
800 skvm::F32 y = b.arrayF(arrayF, 1);
801 b.store32(buf2, b.trunc(b.add(x, y)));
Herb Derbya37001e2021-07-22 17:34:21 -0400802 }
803
804 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbya4953512021-07-23 11:08:25 -0400805 const int K = 10;
806 int32_t buf0[K],
807 buf1[K],
808 buf2[K];
Herb Derbya37001e2021-07-22 17:34:21 -0400809
Herb Derbya4953512021-07-23 11:08:25 -0400810 // reset the i[0] for the two tests.
811 i[0] = 3;
812 f[1] = 9;
813 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
Herb Derbya37001e2021-07-22 17:34:21 -0400814 for (auto v : buf0) {
815 REPORTER_ASSERT(r, v == 3);
816 }
817 for (auto v : buf1) {
818 REPORTER_ASSERT(r, v == 7);
819 }
Herb Derbya4953512021-07-23 11:08:25 -0400820 for (auto v : buf2) {
821 REPORTER_ASSERT(r, v == 14);
822 }
Herb Derbya37001e2021-07-22 17:34:21 -0400823 i[0] = 4;
Herb Derbya4953512021-07-23 11:08:25 -0400824 f[1] = 10;
825 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
Herb Derbya37001e2021-07-22 17:34:21 -0400826 for (auto v : buf0) {
827 REPORTER_ASSERT(r, v == 4);
828 }
829 for (auto v : buf1) {
830 REPORTER_ASSERT(r, v == 7);
831 }
Herb Derbya4953512021-07-23 11:08:25 -0400832 for (auto v : buf2) {
833 REPORTER_ASSERT(r, v == 15);
834 }
Herb Derbya37001e2021-07-22 17:34:21 -0400835 });
836}
837
Mike Klein5a8404c2020-02-28 14:24:56 -0600838DEF_TEST(SkVM_sqrt, r) {
839 skvm::Builder b;
840 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400841 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600842
Mike Kleinfc017c72021-02-08 10:45:19 -0600843 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600844 constexpr int K = 17;
845 float buf[K];
846 for (int i = 0; i < K; i++) {
847 buf[i] = (float)(i*i);
848 }
849
850 // x^2 -> x
851 program.eval(K, buf);
852
853 for (int i = 0; i < K; i++) {
854 REPORTER_ASSERT(r, buf[i] == (float)i);
855 }
856 });
857}
858
Mike Klein3f7c8652019-11-07 10:33:56 -0600859DEF_TEST(SkVM_MSAN, r) {
860 // This little memset32() program should be able to JIT, but if we run that
861 // JIT code in an MSAN build, it won't see the writes initialize buf. So
862 // this tests that we're using the interpreter instead.
863 skvm::Builder b;
864 b.store32(b.varying<int>(), b.splat(42));
865
Mike Kleinfc017c72021-02-08 10:45:19 -0600866 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600867 constexpr int K = 17;
868 int buf[K]; // Intentionally uninitialized.
869 program.eval(K, buf);
870 sk_msan_assert_initialized(buf, buf+K);
871 for (int x : buf) {
872 REPORTER_ASSERT(r, x == 42);
873 }
874 });
875}
876
Mike Klein13601172019-11-08 15:01:02 -0600877DEF_TEST(SkVM_assert, r) {
878 skvm::Builder b;
879 b.assert_true(b.lt(b.load32(b.varying<int>()),
880 b.splat(42)));
881
Mike Kleinfc017c72021-02-08 10:45:19 -0600882 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600883 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600884 program.eval(SK_ARRAY_COUNT(buf), buf);
885 });
886}
887
John Stiles15384b12021-11-05 14:50:33 -0400888DEF_TEST(SkVM_trace_line, r) {
John Stilesa97bd9d2021-11-18 09:21:03 -0500889 class TestTraceHook : public skvm::TraceHook {
890 public:
891 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
John Stileseeb5b222021-11-29 10:07:22 -0500892 void enter(int) override { fBuffer.push_back(-9999999); }
893 void exit(int) override { fBuffer.push_back(-9999999); }
John Stiles09adf0b2021-12-14 14:52:01 -0500894 void scope(int) override { fBuffer.push_back(-9999999); }
John Stilesa97bd9d2021-11-18 09:21:03 -0500895 void line(int lineNum) override { fBuffer.push_back(lineNum); }
896
897 std::vector<int> fBuffer;
898 };
899
John Stiles15384b12021-11-05 14:50:33 -0400900 skvm::Builder b;
John Stilesa97bd9d2021-11-18 09:21:03 -0500901 TestTraceHook testTrace;
John Stilesd4713ad2021-12-03 11:12:29 -0500902 int traceHookID = b.attachTraceHook(&testTrace);
903 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 123);
904 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 456);
905 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 567);
906 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 678);
907 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 789);
908 skvm::Program p = b.done();
John Stilesa97bd9d2021-11-18 09:21:03 -0500909 p.eval(1);
John Stiles15384b12021-11-05 14:50:33 -0400910
John Stilesa97bd9d2021-11-18 09:21:03 -0500911 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{123, 789}));
912}
913
914DEF_TEST(SkVM_trace_var, r) {
915 class TestTraceHook : public skvm::TraceHook {
916 public:
917 void line(int) override { fBuffer.push_back(-9999999); }
John Stileseeb5b222021-11-29 10:07:22 -0500918 void enter(int) override { fBuffer.push_back(-9999999); }
919 void exit(int) override { fBuffer.push_back(-9999999); }
John Stiles09adf0b2021-12-14 14:52:01 -0500920 void scope(int) override { fBuffer.push_back(-9999999); }
John Stilesa97bd9d2021-11-18 09:21:03 -0500921 void var(int slot, int32_t val) override {
922 fBuffer.push_back(slot);
923 fBuffer.push_back(val);
924 }
925
926 std::vector<int> fBuffer;
927 };
928
929 skvm::Builder b;
John Stilesa97bd9d2021-11-18 09:21:03 -0500930 TestTraceHook testTrace;
John Stilesd4713ad2021-12-03 11:12:29 -0500931 int traceHookID = b.attachTraceHook(&testTrace);
932 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 2, b.splat(333));
933 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 4, b.splat(555));
934 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 5, b.splat(666));
935 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 6, b.splat(777));
936 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 8, b.splat(999));
937 skvm::Program p = b.done();
John Stilesa97bd9d2021-11-18 09:21:03 -0500938 p.eval(1);
939
940 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{4, 555, 6, 777}));
941}
942
John Stileseeb5b222021-11-29 10:07:22 -0500943DEF_TEST(SkVM_trace_enter_exit, r) {
John Stilesa97bd9d2021-11-18 09:21:03 -0500944 class TestTraceHook : public skvm::TraceHook {
945 public:
946 void line(int) override { fBuffer.push_back(-9999999); }
947 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
John Stiles09adf0b2021-12-14 14:52:01 -0500948 void scope(int) override { fBuffer.push_back(-9999999); }
John Stileseeb5b222021-11-29 10:07:22 -0500949 void enter(int fnIdx) override {
John Stilesa97bd9d2021-11-18 09:21:03 -0500950 fBuffer.push_back(fnIdx);
John Stileseeb5b222021-11-29 10:07:22 -0500951 fBuffer.push_back(1);
952 }
953 void exit(int fnIdx) override {
954 fBuffer.push_back(fnIdx);
955 fBuffer.push_back(0);
John Stilesa97bd9d2021-11-18 09:21:03 -0500956 }
957
958 std::vector<int> fBuffer;
959 };
960
961 skvm::Builder b;
John Stilesa97bd9d2021-11-18 09:21:03 -0500962 TestTraceHook testTrace;
John Stilesd4713ad2021-12-03 11:12:29 -0500963 int traceHookID = b.attachTraceHook(&testTrace);
964 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 99);
965 b.trace_enter(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 12);
966 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 34);
967 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 56);
968 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 78);
969 b.trace_exit(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 90);
970 skvm::Program p = b.done();
John Stilesa97bd9d2021-11-18 09:21:03 -0500971 p.eval(1);
972
973 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{12, 1, 56, 0}));
John Stiles15384b12021-11-05 14:50:33 -0400974}
975
John Stiles09adf0b2021-12-14 14:52:01 -0500976DEF_TEST(SkVM_trace_scope, r) {
977 class TestTraceHook : public skvm::TraceHook {
978 public:
979 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
980 void enter(int) override { fBuffer.push_back(-9999999); }
981 void exit(int) override { fBuffer.push_back(-9999999); }
982 void line(int) override { fBuffer.push_back(-9999999); }
983 void scope(int delta) override { fBuffer.push_back(delta); }
984
985 std::vector<int> fBuffer;
986 };
987
988 skvm::Builder b;
989 TestTraceHook testTrace;
990 int traceHookID = b.attachTraceHook(&testTrace);
991 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 1);
992 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), -2);
993 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 3);
994 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 4);
995 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), -5);
996 skvm::Program p = b.done();
997 p.eval(1);
998
999 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{1, -5}));
1000}
1001
John Stilesd4713ad2021-12-03 11:12:29 -05001002DEF_TEST(SkVM_trace_multiple_hooks, r) {
1003 class TestTraceHook : public skvm::TraceHook {
1004 public:
1005 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
1006 void enter(int) override { fBuffer.push_back(-9999999); }
1007 void exit(int) override { fBuffer.push_back(-9999999); }
John Stiles09adf0b2021-12-14 14:52:01 -05001008 void scope(int) override { fBuffer.push_back(-9999999); }
John Stilesd4713ad2021-12-03 11:12:29 -05001009 void line(int lineNum) override { fBuffer.push_back(lineNum); }
1010
1011 std::vector<int> fBuffer;
1012 };
1013
1014 skvm::Builder b;
1015 TestTraceHook testTraceA, testTraceB, testTraceC;
1016 int traceHookAID = b.attachTraceHook(&testTraceA);
1017 int traceHookBID = b.attachTraceHook(&testTraceB);
1018 int traceHookCID = b.attachTraceHook(&testTraceC);
1019 b.trace_line(traceHookCID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 111);
1020 b.trace_line(traceHookAID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 222);
1021 b.trace_line(traceHookCID, b.splat(0x00000000), b.splat(0x00000000), 333);
1022 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 444);
1023 b.trace_line(traceHookAID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 555);
1024 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 666);
1025 skvm::Program p = b.done();
1026 p.eval(1);
1027
1028 REPORTER_ASSERT(r, (testTraceA.fBuffer == std::vector<int>{222}));
1029 REPORTER_ASSERT(r, (testTraceB.fBuffer == std::vector<int>{666}));
1030 REPORTER_ASSERT(r, (testTraceC.fBuffer == std::vector<int>{111}));
1031}
1032
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001033DEF_TEST(SkVM_premul, reporter) {
1034 // Test that premul is short-circuited when alpha is known opaque.
1035 {
1036 skvm::Builder p;
1037 auto rptr = p.varying<int>(),
1038 aptr = p.varying<int>();
1039
Mike Reedf5ff4c22020-03-23 14:57:53 -04001040 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001041 g = p.splat(0.0f),
1042 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001043 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001044
1045 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001046 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001047
1048 // load red, load alpha, red *= alpha, store red
1049 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
1050 }
1051
1052 {
1053 skvm::Builder p;
1054 auto rptr = p.varying<int>();
1055
Mike Reedf5ff4c22020-03-23 14:57:53 -04001056 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001057 g = p.splat(0.0f),
1058 b = p.splat(0.0f),
1059 a = p.splat(1.0f);
1060
1061 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001062 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001063
1064 // load red, store red
1065 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1066 }
1067
1068 // Same deal for unpremul.
1069 {
1070 skvm::Builder p;
1071 auto rptr = p.varying<int>(),
1072 aptr = p.varying<int>();
1073
Mike Reedf5ff4c22020-03-23 14:57:53 -04001074 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001075 g = p.splat(0.0f),
1076 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001077 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001078
1079 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001080 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001081
1082 // load red, load alpha, a bunch of unpremul instructions, store red
1083 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1084 }
1085
1086 {
1087 skvm::Builder p;
1088 auto rptr = p.varying<int>();
1089
Mike Reedf5ff4c22020-03-23 14:57:53 -04001090 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001091 g = p.splat(0.0f),
1092 b = p.splat(0.0f),
1093 a = p.splat(1.0f);
1094
1095 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001096 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001097
1098 // load red, store red
1099 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1100 }
1101}
Mike Klein05642042019-06-18 12:16:06 -05001102
Mike Klein05642042019-06-18 12:16:06 -05001103template <typename Fn>
1104static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -04001105 uint8_t buf[4096];
1106 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -05001107 fn(a);
1108
1109 REPORTER_ASSERT(r, a.size() == expected.size());
1110
Mike Klein88c0a902019-06-24 15:34:02 -04001111 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -05001112 want = expected.begin();
1113 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -05001114 REPORTER_ASSERT(r, got[i] == want[i],
1115 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -05001116 }
1117}
1118
1119DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -05001120 // Easiest way to generate test cases is
1121 //
1122 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1123 //
1124 // The -x86-asm-syntax=intel bit is optional, controlling the
1125 // input syntax only; the output will always be AT&T op x,y,dst style.
1126 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1127 // that a bit easier to use here, despite maybe favoring AT&T overall.
1128
1129 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -05001130 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -05001131 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -06001132 a.int3();
Mike Klein05642042019-06-18 12:16:06 -05001133 a.vzeroupper();
1134 a.ret();
1135 },{
Mike Kleinee5864a2019-11-11 09:16:44 -06001136 0xcc,
Mike Klein05642042019-06-18 12:16:06 -05001137 0xc5, 0xf8, 0x77,
1138 0xc3,
1139 });
1140
Mike Klein237dbb42019-07-19 09:44:47 -05001141 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -05001142 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -05001143 a.ret();
1144 a.align(4);
1145 },{
1146 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -05001147 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -05001148 });
Mike Klein61703a62019-06-18 15:01:12 -05001149
Mike Klein397fc882019-06-20 11:37:10 -05001150 test_asm(r, [&](A& a) {
1151 a.add(A::rax, 8); // Always good to test rax.
1152 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001153
Mike Klein397fc882019-06-20 11:37:10 -05001154 a.add(A::rdi, 12); // Last 0x48 REX
1155 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001156
Mike Klein86a645c2019-07-12 12:29:39 -05001157 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001158 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001159
Mike Klein397fc882019-06-20 11:37:10 -05001160 a.add(A::rsi, 128); // Requires 4 byte immediate.
1161 a.sub(A::r8 , 1000000);
Mike Kleinc15c9362020-04-16 11:10:36 -05001162
1163 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1164 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1165 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
Mike Klein68d075e2020-07-28 09:26:51 -05001166 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
Mike Kleinc15c9362020-04-16 11:10:36 -05001167 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
Mike Klein68d075e2020-07-28 09:26:51 -05001168 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
1169 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
Mike Kleinc15c9362020-04-16 11:10:36 -05001170 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1171 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1172 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1173
1174 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1175
1176 a.add( A::rax , A::rcx); // addq %rcx, %rax
1177 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1178 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1179 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1180
1181 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
Mike Klein61703a62019-06-18 15:01:12 -05001182 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001183 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001184 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001185
1186 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001187 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001188
Mike Klein86a645c2019-07-12 12:29:39 -05001189 0x49, 0x83, 0b11'000'000, 0x07,
1190 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001191
1192 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001193 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Kleinc15c9362020-04-16 11:10:36 -05001194
1195 0x48,0x83,0x06,0x07,
1196 0x48,0x83,0x46,0x0c,0x07,
1197 0x48,0x83,0x44,0x24,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001198 0x49,0x83,0x44,0x24,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001199 0x48,0x83,0x44,0x84,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001200 0x49,0x83,0x44,0x84,0x0c,0x07,
1201 0x4a,0x83,0x44,0xa0,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001202 0x4b,0x83,0x44,0x43,0x0c,0x07,
1203 0x49,0x83,0x44,0x03,0x0c,0x07,
1204 0x4a,0x83,0x44,0x18,0x0c,0x07,
1205
1206 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1207
1208 0x48,0x01,0xc8,
1209 0x48,0x01,0x08,
1210 0x48,0x01,0x48,0x0c,
1211 0x48,0x03,0x48,0x0c,
1212 0x48,0x2b,0x48,0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001213 });
Mike Klein397fc882019-06-20 11:37:10 -05001214
1215
1216 test_asm(r, [&](A& a) {
1217 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1218 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1219 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1220 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1221 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1222 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1223 },{
1224 /* VEX */ /*op*/ /*modRM*/
1225 0xc5, 0xf5, 0xfe, 0xc2,
1226 0xc5, 0x75, 0xfe, 0xc2,
1227 0xc5, 0xbd, 0xfe, 0xc2,
1228 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1229 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1230 0xc5, 0xf5, 0xfa, 0xc2,
1231 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001232
1233 test_asm(r, [&](A& a) {
Mike Klein84dd8f92020-09-15 07:57:27 -05001234 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1235 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1236 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1237 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1238
1239 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1240 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1241 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1242 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1243
1244 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1245 a.vpabsw (A::ymm4, A::ymm3);
1246 a.vpsllw (A::ymm4, A::ymm3, 12);
1247 a.vpsraw (A::ymm4, A::ymm3, 12);
1248 },{
1249 0xc5, 0xe5, 0xfd, 0xe2,
1250 0xc5, 0xe5, 0xe3, 0xe2,
1251 0xc5, 0xe5, 0x75, 0xe2,
1252 0xc5, 0xe5, 0x65, 0xe2,
1253
1254 0xc5, 0xe5, 0xea, 0xe2,
1255 0xc5, 0xe5, 0xee, 0xe2,
1256 0xc4,0xe2,0x65, 0x3a, 0xe2,
1257 0xc4,0xe2,0x65, 0x3e, 0xe2,
1258
1259 0xc4,0xe2,0x65, 0x0b, 0xe2,
1260 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1261 0xc5,0xdd,0x71, 0xf3, 0x0c,
1262 0xc5,0xdd,0x71, 0xe3, 0x0c,
1263 });
1264
1265 test_asm(r, [&](A& a) {
Mike Klein48c51bb2020-04-23 18:07:49 -05001266 A::Label l;
1267 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
Mike Klein714f8cc2019-11-06 12:54:46 -06001268 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1269 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1270 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1271 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1272 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1273 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Klein48c51bb2020-04-23 18:07:49 -05001274 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
Mike Kleinb9944122019-08-02 12:22:39 -05001275 },{
Mike Klein48c51bb2020-04-23 18:07:49 -05001276 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
Mike Kleinb9944122019-08-02 12:22:39 -05001277 0xc5,0xf5,0x76,0xc2,
1278 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001279 0xc5,0xf4,0xc2,0xc2,0x00,
1280 0xc5,0xf4,0xc2,0xc2,0x01,
1281 0xc5,0xf4,0xc2,0xc2,0x02,
1282 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001283 });
1284
1285 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001286 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1287 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1288 },{
1289 0xc5,0xf4,0x5d,0xc2,
1290 0xc5,0xf4,0x5f,0xc2,
1291 });
1292
1293 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001294 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1295 },{
1296 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1297 });
1298
1299 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001300 a.vpsrld(A::ymm15, A::ymm2, 8);
1301 a.vpsrld(A::ymm0 , A::ymm8, 5);
1302 },{
1303 0xc5, 0x85, 0x72,0xd2, 0x08,
1304 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1305 });
1306
1307 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001308 A::Label l;
Mike Klein184f6012020-07-22 13:17:29 -05001309 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
Mike Kleind8194dc2020-07-22 10:42:11 -05001310 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
Mike Kleinff0ae812019-06-20 15:03:44 -05001311 a.vpermq(A::ymm1, A::ymm2, 5);
Mike Kleind8194dc2020-07-22 10:42:11 -05001312 a.label(&l); // 6 bytes after vperm2f128
Mike Kleinff0ae812019-06-20 15:03:44 -05001313 },{
Mike Klein184f6012020-07-22 13:17:29 -05001314 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
Mike Kleind8194dc2020-07-22 10:42:11 -05001315 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
Mike Kleinff0ae812019-06-20 15:03:44 -05001316 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1317 });
Mike Kleine5053412019-06-21 12:37:22 -05001318
1319 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001320 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1321 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1322 },{
1323 0xc5,0xed,0x62,0x0f,
1324 0xc5,0xed,0x6a,0xcb,
1325 });
1326
1327 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001328 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1329 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1330 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1331 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1332 },{
1333 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1334 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1335 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1336 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1337 });
1338
1339 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001340 A::Label l;
1341 a.label(&l);
Mike Kleine5053412019-06-21 12:37:22 -05001342 a.byte(1);
1343 a.byte(2);
1344 a.byte(3);
1345 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001346
Mike Klein65c10b52019-07-12 09:22:21 -05001347 a.vbroadcastss(A::ymm0 , &l);
1348 a.vbroadcastss(A::ymm1 , &l);
1349 a.vbroadcastss(A::ymm8 , &l);
1350 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001351
Mike Klein65c10b52019-07-12 09:22:21 -05001352 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001353 a.vpaddd (A::ymm4, A::ymm3, &l);
1354 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001355
1356 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001357
1358 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001359 },{
1360 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001361
Mike Kleine5053412019-06-21 12:37:22 -05001362 /* VEX */ /*op*/ /* ModRM */ /* offset */
1363 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1364 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1365 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1366 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001367
1368 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001369
1370 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1371 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001372
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001373 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1374
1375 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001376 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001377
1378 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001379 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1380 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1381 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1382 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
Mike Klein94d054b2019-08-02 10:54:23 -05001383
1384 a.vbroadcastss(A::ymm8, A::xmm0);
1385 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001386 },{
1387 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1388 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1389 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1390 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1391 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001392
1393 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1394 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001395 });
1396
1397 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001398 A::Label l;
1399 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001400 a.jne(&l);
1401 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001402 a.je (&l);
1403 a.jmp(&l);
1404 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001405 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001406
Mike Kleinc15c9362020-04-16 11:10:36 -05001407 a.cmp(A::rdx, 1);
Mike Klein35b97c32019-07-12 12:32:45 -05001408 a.cmp(A::rax, 12);
1409 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001410 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001411 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1412 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1413 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1414 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1415 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001416 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001417
Mike Kleinc15c9362020-04-16 11:10:36 -05001418 0x48,0x83,0xfa,0x01,
Mike Klein35b97c32019-07-12 12:32:45 -05001419 0x48,0x83,0xf8,0x0c,
1420 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001421 });
Mike Klein120d9e82019-06-21 15:52:55 -05001422
1423 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001424 a.vmovups(A::ymm5, A::Mem{A::rsi});
1425 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001426
Mike Klein400ba222020-06-30 15:54:19 -05001427 a.vmovups(A::xmm5, A::Mem{A::rsi});
Mike Klein8390f2e2020-04-15 17:03:08 -05001428 a.vmovups(A::Mem{A::rsi}, A::xmm5);
Mike Klein95529e82019-08-02 11:43:43 -05001429
Mike Kleinedc2dac2020-04-15 16:18:27 -05001430 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1431 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001432
Mike Klein8390f2e2020-04-15 17:03:08 -05001433 a.vmovq(A::Mem{A::rdx}, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001434 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001435 /* VEX */ /*Op*/ /* ModRM */
1436 0xc5, 0xfc, 0x10, 0b00'101'110,
1437 0xc5, 0xfc, 0x11, 0b00'101'110,
1438
Mike Klein400ba222020-06-30 15:54:19 -05001439 0xc5, 0xf8, 0x10, 0b00'101'110,
Mike Klein95529e82019-08-02 11:43:43 -05001440 0xc5, 0xf8, 0x11, 0b00'101'110,
1441
Mike Klein52010b72019-08-02 11:18:00 -05001442 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001443 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001444
1445 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001446 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001447
1448 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001449 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1450 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1451 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001452
Mike Kleinedc2dac2020-04-15 16:18:27 -05001453 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1454 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1455 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001456 },{
1457 0xc5,0xfc,0x10,0x2c,0x24,
1458 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1459 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1460
1461 0xc5,0xfc,0x11,0x2c,0x24,
1462 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1463 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1464 });
1465
1466 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001467 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1468 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1469 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1470 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1471 a.movzbq(A::r8, A::Mem{A::rsi, 400});
Mike Klein35b97c32019-07-12 12:32:45 -05001472
Mike Kleinc15c9362020-04-16 11:10:36 -05001473 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1474 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1475 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1476 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1477 a.movzwq(A::r8, A::Mem{A::rsi, 400});
Mike Kleincb511042020-04-13 13:12:17 -05001478
Mike Klein8390f2e2020-04-15 17:03:08 -05001479 a.vmovd(A::Mem{A::rax}, A::xmm0);
1480 a.vmovd(A::Mem{A::rax}, A::xmm8);
1481 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1482
1483 a.vmovd(A::xmm0, A::Mem{A::rax});
1484 a.vmovd(A::xmm8, A::Mem{A::rax});
1485 a.vmovd(A::xmm0, A::Mem{A::r8 });
1486
1487 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1488 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1489 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1490
Mike Klein35b97c32019-07-12 12:32:45 -05001491 a.vmovd(A::rax, A::xmm0);
1492 a.vmovd(A::rax, A::xmm8);
Mike Klein8390f2e2020-04-15 17:03:08 -05001493 a.vmovd(A::r8 , A::xmm0);
Mike Klein35b97c32019-07-12 12:32:45 -05001494
1495 a.vmovd(A::xmm0, A::rax);
1496 a.vmovd(A::xmm8, A::rax);
Mike Klein8390f2e2020-04-15 17:03:08 -05001497 a.vmovd(A::xmm0, A::r8 );
Mike Klein35b97c32019-07-12 12:32:45 -05001498
Mike Kleinc15c9362020-04-16 11:10:36 -05001499 a.movb(A::Mem{A::rdx}, A::rax);
1500 a.movb(A::Mem{A::rdx}, A::r8 );
1501 a.movb(A::Mem{A::r8 }, A::rax);
Mike Klein35b97c32019-07-12 12:32:45 -05001502
Mike Kleinc15c9362020-04-16 11:10:36 -05001503 a.movb(A::rdx, A::Mem{A::rax});
1504 a.movb(A::rdx, A::Mem{A::r8 });
1505 a.movb(A::r8 , A::Mem{A::rax});
1506
1507 a.movb(A::rdx, 12);
1508 a.movb(A::rax, 4);
1509 a.movb(A::r8 , -1);
1510
1511 a.movb(A::Mem{A::rdx}, 12);
1512 a.movb(A::Mem{A::rax}, 4);
1513 a.movb(A::Mem{A::r8 }, -1);
1514 },{
1515 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1516 0x49,0x0f,0xb6,0x00,
1517 0x4c,0x0f,0xb6,0x06,
1518 0x4c,0x0f,0xb6,0x46, 12,
1519 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1520
1521 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1522 0x49,0x0f,0xb7,0x00,
1523 0x4c,0x0f,0xb7,0x06,
1524 0x4c,0x0f,0xb7,0x46, 12,
1525 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
Mike Kleincb511042020-04-13 13:12:17 -05001526
Mike Klein35b97c32019-07-12 12:32:45 -05001527 0xc5,0xf9,0x7e,0x00,
1528 0xc5,0x79,0x7e,0x00,
1529 0xc4,0xc1,0x79,0x7e,0x00,
1530
1531 0xc5,0xf9,0x6e,0x00,
1532 0xc5,0x79,0x6e,0x00,
1533 0xc4,0xc1,0x79,0x6e,0x00,
1534
Mike Klein93d3fab2020-01-14 10:46:44 -06001535 0xc5,0xf9,0x6e,0x04,0x88,
1536 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1537 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1538
Mike Klein35b97c32019-07-12 12:32:45 -05001539 0xc5,0xf9,0x7e,0xc0,
1540 0xc5,0x79,0x7e,0xc0,
1541 0xc4,0xc1,0x79,0x7e,0xc0,
1542
1543 0xc5,0xf9,0x6e,0xc0,
1544 0xc5,0x79,0x6e,0xc0,
1545 0xc4,0xc1,0x79,0x6e,0xc0,
1546
Mike Kleinc15c9362020-04-16 11:10:36 -05001547 0x48 ,0x88, 0x02,
1548 0x4c, 0x88, 0x02,
1549 0x49, 0x88, 0x00,
1550
1551 0x48 ,0x8a, 0x10,
1552 0x49, 0x8a, 0x10,
1553 0x4c, 0x8a, 0x00,
1554
1555 0x48, 0xc6, 0xc2, 0x0c,
1556 0x48, 0xc6, 0xc0, 0x04,
1557 0x49, 0xc6, 0xc0, 0xff,
1558
1559 0x48, 0xc6, 0x02, 0x0c,
1560 0x48, 0xc6, 0x00, 0x04,
1561 0x49, 0xc6, 0x00, 0xff,
Mike Klein35b97c32019-07-12 12:32:45 -05001562 });
1563
1564 test_asm(r, [&](A& a) {
Mike Klein4ecc9702020-07-30 10:03:10 -05001565 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1566 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1567
Mike Klein8390f2e2020-04-15 17:03:08 -05001568 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1569 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
Mike Klein52010b72019-08-02 11:18:00 -05001570
Mike Klein8390f2e2020-04-15 17:03:08 -05001571 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
Mike Klein4ecc9702020-07-30 10:03:10 -05001572 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
Mike Klein35b97c32019-07-12 12:32:45 -05001573
Mike Klein21e85eb2020-04-17 13:57:13 -05001574 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1575 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1576
1577 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1578 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1579
Mike Klein8390f2e2020-04-15 17:03:08 -05001580 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1581 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein95529e82019-08-02 11:43:43 -05001582
Mike Klein8390f2e2020-04-15 17:03:08 -05001583 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1584 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein35b97c32019-07-12 12:32:45 -05001585 },{
Mike Klein4ecc9702020-07-30 10:03:10 -05001586 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1587 0xc4,0x43,0x71, 0x22, 0x00, 3,
1588
Mike Klein52010b72019-08-02 11:18:00 -05001589 0xc5,0xb9, 0xc4, 0x0e, 4,
1590 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1591
Mike Klein35b97c32019-07-12 12:32:45 -05001592 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1593 0xc4,0x43,0x71, 0x20, 0x00, 12,
1594
Mike Klein21e85eb2020-04-17 13:57:13 -05001595 0xc4,0x63,0x7d,0x39,0xc1, 1,
1596 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1597
1598 0xc4,0x63,0x79,0x16,0x06, 3,
1599 0xc4,0xc3,0x79,0x16,0x08, 2,
1600
Mike Klein95529e82019-08-02 11:43:43 -05001601 0xc4,0x63,0x79, 0x15, 0x06, 7,
1602 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1603
Mike Klein35b97c32019-07-12 12:32:45 -05001604 0xc4,0x63,0x79, 0x14, 0x06, 7,
1605 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1606 });
1607
1608 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001609 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1610 },{
1611 0xc5, 0x9d, 0xdf, 0xda,
1612 });
Mike Klein9f4df802019-06-24 18:47:16 -04001613
Mike Kleind4546d62019-07-30 12:15:40 -05001614 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001615 A::Label l;
1616 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1617
1618 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1619 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1620 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1621
1622 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1623 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1624
1625 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1626 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1627 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1628 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1629 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1630
1631 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1632 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1633 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1634
Mike Kleind4546d62019-07-30 12:15:40 -05001635 a.vcvttps2dq(A::ymm3, A::ymm2);
1636 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001637 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001638 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001639 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001640 },{
1641 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001642
1643 0xc5,0xfd,0x6f,0x1e,
1644 0xc5,0xfd,0x6f,0x1c,0x24,
1645 0xc4,0xc1,0x7d,0x6f,0x1b,
1646
1647 0xc5,0xfd,0x6f,0x5e,0x04,
1648 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1649
1650 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1651 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1652 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1653 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1654 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1655
1656 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1657 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1658
1659 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1660
Mike Kleind4546d62019-07-30 12:15:40 -05001661 0xc5,0xfe,0x5b,0xda,
1662 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001663 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001664 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001665 });
1666
Mike Kleinbeaa1082020-01-13 14:04:18 -06001667 test_asm(r, [&](A& a) {
Mike Klein4d680cd2020-07-15 09:58:51 -05001668 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1669 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1670
1671 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1672 a.vcvtph2ps(A::ymm2, A::xmm3);
1673 },{
1674 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1675 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1676
1677 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1678 0xc4,0xe2,0x7d,0x13,0xd3,
1679 });
1680
1681 test_asm(r, [&](A& a) {
Mike Kleinbeaa1082020-01-13 14:04:18 -06001682 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1683 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1684 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1685 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1686 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1687 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1688 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1689 },{
1690 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1691 0xc4,0xe2,0x75,0x92,0x04,0x10,
1692 0xc4,0x62,0x75,0x92,0x14,0x10,
1693 0xc4,0xa2,0x75,0x92,0x04,0x20,
1694 0xc4,0xc2,0x75,0x92,0x04,0x11,
1695 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1696 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1697 });
1698
Mike Kleinc322f632020-01-13 16:18:58 -06001699 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001700 a.mov(A::rax, A::Mem{A::rdi, 0});
1701 a.mov(A::rax, A::Mem{A::rdi, 1});
1702 a.mov(A::rax, A::Mem{A::rdi, 512});
1703 a.mov(A::r15, A::Mem{A::r13, 42});
1704 a.mov(A::rax, A::Mem{A::r13, 42});
1705 a.mov(A::r15, A::Mem{A::rax, 42});
1706 a.mov(A::rax, 1);
1707 a.mov(A::rax, A::rcx);
Mike Kleinc322f632020-01-13 16:18:58 -06001708 },{
1709 0x48, 0x8b, 0x07,
1710 0x48, 0x8b, 0x47, 0x01,
1711 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1712 0x4d, 0x8b, 0x7d, 0x2a,
1713 0x49, 0x8b, 0x45, 0x2a,
1714 0x4c, 0x8b, 0x78, 0x2a,
Mike Kleinc15c9362020-04-16 11:10:36 -05001715 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1716 0x48, 0x89, 0xc8,
Mike Kleinc322f632020-01-13 16:18:58 -06001717 });
1718
Mike Klein9f4df802019-06-24 18:47:16 -04001719 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1720
1721 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001722 a.and16b(A::v4, A::v3, A::v1);
1723 a.orr16b(A::v4, A::v3, A::v1);
1724 a.eor16b(A::v4, A::v3, A::v1);
1725 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001726 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001727 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001728
1729 a.add4s(A::v4, A::v3, A::v1);
1730 a.sub4s(A::v4, A::v3, A::v1);
1731 a.mul4s(A::v4, A::v3, A::v1);
1732
Mike Klein97afd2e2019-10-16 14:11:27 -05001733 a.cmeq4s(A::v4, A::v3, A::v1);
1734 a.cmgt4s(A::v4, A::v3, A::v1);
1735
Mike Klein65809142019-06-25 09:44:02 -04001736 a.sub8h(A::v4, A::v3, A::v1);
1737 a.mul8h(A::v4, A::v3, A::v1);
1738
Mike Klein9f4df802019-06-24 18:47:16 -04001739 a.fadd4s(A::v4, A::v3, A::v1);
1740 a.fsub4s(A::v4, A::v3, A::v1);
1741 a.fmul4s(A::v4, A::v3, A::v1);
1742 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001743 a.fmin4s(A::v4, A::v3, A::v1);
1744 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein8d78da92020-11-25 13:53:20 -06001745
1746 a.fneg4s (A::v4, A::v3);
1747 a.fsqrt4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001748
Mike Klein65809142019-06-25 09:44:02 -04001749 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001750 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001751
1752 a.fcmeq4s(A::v4, A::v3, A::v1);
1753 a.fcmgt4s(A::v4, A::v3, A::v1);
1754 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001755 },{
Mike Klein65809142019-06-25 09:44:02 -04001756 0x64,0x1c,0x21,0x4e,
1757 0x64,0x1c,0xa1,0x4e,
1758 0x64,0x1c,0x21,0x6e,
1759 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001760 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001761 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001762
1763 0x64,0x84,0xa1,0x4e,
1764 0x64,0x84,0xa1,0x6e,
1765 0x64,0x9c,0xa1,0x4e,
1766
Mike Klein97afd2e2019-10-16 14:11:27 -05001767 0x64,0x8c,0xa1,0x6e,
1768 0x64,0x34,0xa1,0x4e,
1769
Mike Klein65809142019-06-25 09:44:02 -04001770 0x64,0x84,0x61,0x6e,
1771 0x64,0x9c,0x61,0x4e,
1772
Mike Klein9f4df802019-06-24 18:47:16 -04001773 0x64,0xd4,0x21,0x4e,
1774 0x64,0xd4,0xa1,0x4e,
1775 0x64,0xdc,0x21,0x6e,
1776 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001777 0x64,0xf4,0xa1,0x4e,
1778 0x64,0xf4,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001779
Mike Klein7c0332c2020-03-05 14:18:04 -06001780 0x64,0xf8,0xa0,0x6e,
Mike Klein8d78da92020-11-25 13:53:20 -06001781 0x64,0xf8,0xa1,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001782
Mike Klein65809142019-06-25 09:44:02 -04001783 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001784 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001785
1786 0x64,0xe4,0x21,0x4e,
1787 0x64,0xe4,0xa1,0x6e,
1788 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001789 });
1790
1791 test_asm(r, [&](A& a) {
1792 a.shl4s(A::v4, A::v3, 0);
1793 a.shl4s(A::v4, A::v3, 1);
1794 a.shl4s(A::v4, A::v3, 8);
1795 a.shl4s(A::v4, A::v3, 16);
1796 a.shl4s(A::v4, A::v3, 31);
1797
1798 a.sshr4s(A::v4, A::v3, 1);
1799 a.sshr4s(A::v4, A::v3, 8);
1800 a.sshr4s(A::v4, A::v3, 31);
1801
1802 a.ushr4s(A::v4, A::v3, 1);
1803 a.ushr4s(A::v4, A::v3, 8);
1804 a.ushr4s(A::v4, A::v3, 31);
1805
1806 a.ushr8h(A::v4, A::v3, 1);
1807 a.ushr8h(A::v4, A::v3, 8);
1808 a.ushr8h(A::v4, A::v3, 15);
1809 },{
1810 0x64,0x54,0x20,0x4f,
1811 0x64,0x54,0x21,0x4f,
1812 0x64,0x54,0x28,0x4f,
1813 0x64,0x54,0x30,0x4f,
1814 0x64,0x54,0x3f,0x4f,
1815
1816 0x64,0x04,0x3f,0x4f,
1817 0x64,0x04,0x38,0x4f,
1818 0x64,0x04,0x21,0x4f,
1819
1820 0x64,0x04,0x3f,0x6f,
1821 0x64,0x04,0x38,0x6f,
1822 0x64,0x04,0x21,0x6f,
1823
1824 0x64,0x04,0x1f,0x6f,
1825 0x64,0x04,0x18,0x6f,
1826 0x64,0x04,0x11,0x6f,
1827 });
1828
1829 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001830 a.sli4s(A::v4, A::v3, 0);
1831 a.sli4s(A::v4, A::v3, 1);
1832 a.sli4s(A::v4, A::v3, 8);
1833 a.sli4s(A::v4, A::v3, 16);
1834 a.sli4s(A::v4, A::v3, 31);
1835 },{
1836 0x64,0x54,0x20,0x6f,
1837 0x64,0x54,0x21,0x6f,
1838 0x64,0x54,0x28,0x6f,
1839 0x64,0x54,0x30,0x6f,
1840 0x64,0x54,0x3f,0x6f,
1841 });
1842
1843 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001844 a.scvtf4s (A::v4, A::v3);
1845 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001846 a.fcvtns4s(A::v4, A::v3);
Mike Klein8d78da92020-11-25 13:53:20 -06001847 a.frintp4s(A::v4, A::v3);
1848 a.frintm4s(A::v4, A::v3);
Mike Kleinec255632020-12-03 10:25:31 -06001849 a.fcvtn (A::v4, A::v3);
1850 a.fcvtl (A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001851 },{
1852 0x64,0xd8,0x21,0x4e,
1853 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001854 0x64,0xa8,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001855 0x64,0x88,0xa1,0x4e,
1856 0x64,0x98,0x21,0x4e,
Mike Kleinec255632020-12-03 10:25:31 -06001857 0x64,0x68,0x21,0x0e,
1858 0x64,0x78,0x21,0x0e,
Mike Klein9f4df802019-06-24 18:47:16 -04001859 });
Mike Klein15a368d2019-06-26 10:21:12 -04001860
1861 test_asm(r, [&](A& a) {
Mike Kleinb8e041e2020-04-17 11:30:29 -05001862 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1863 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1864 a.strq(A::v1, A::sp); // str q1, [sp]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001865 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001866 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001867 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001868 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1869 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001870 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001871 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001872 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001873 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1874 a.add (A::sp, A::sp, 32); // add sp, sp, #32
Mike Klein48e78242020-04-17 09:38:09 -05001875 },{
1876 0xff,0x83,0x00,0xd1,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001877 0xe0,0x07,0x80,0x3d,
1878 0xe1,0x03,0x80,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001879 0xe0,0x1b,0x00,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001880 0xe0,0x1b,0x00,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001881 0xe0,0x2b,0x00,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001882 0xe0,0xbf,0x00,0x3d,
1883 0xe9,0xab,0x40,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001884 0xe9,0xbf,0x40,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001885 0xe7,0x2b,0x40,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001886 0xe7,0x07,0x40,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001887 0xe5,0x03,0xc2,0x3d,
Mike Klein48e78242020-04-17 09:38:09 -05001888 0xff,0x83,0x00,0x91,
1889 });
1890
1891 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001892 a.brk(0);
1893 a.brk(65535);
1894
Mike Klein15a368d2019-06-26 10:21:12 -04001895 a.ret(A::x30); // Conventional ret using link register.
1896 a.ret(A::x13); // Can really return using any register if we like.
1897
1898 a.add(A::x2, A::x2, 4);
1899 a.add(A::x3, A::x2, 32);
1900
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001901 a.sub(A::x2, A::x2, 4);
1902 a.sub(A::x3, A::x2, 32);
1903
Mike Klein15a368d2019-06-26 10:21:12 -04001904 a.subs(A::x2, A::x2, 4);
1905 a.subs(A::x3, A::x2, 32);
1906
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001907 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1908 a.cmp(A::x2, 4);
1909
Mike Kleinc74db792020-05-11 11:57:12 -05001910 A::Label l;
1911 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001912 a.bne(&l);
1913 a.bne(&l);
1914 a.blt(&l);
1915 a.b(&l);
1916 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001917 a.cbz(A::x2, &l);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001918
1919 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1
1920 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3
Mike Klein15a368d2019-06-26 10:21:12 -04001921 },{
Mike Klein37be7712019-11-13 13:19:01 -06001922 0x00,0x00,0x20,0xd4,
1923 0xe0,0xff,0x3f,0xd4,
1924
Mike Klein15a368d2019-06-26 10:21:12 -04001925 0xc0,0x03,0x5f,0xd6,
1926 0xa0,0x01,0x5f,0xd6,
1927
1928 0x42,0x10,0x00,0x91,
1929 0x43,0x80,0x00,0x91,
1930
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001931 0x42,0x10,0x00,0xd1,
1932 0x43,0x80,0x00,0xd1,
1933
Mike Klein15a368d2019-06-26 10:21:12 -04001934 0x42,0x10,0x00,0xf1,
1935 0x43,0x80,0x00,0xf1,
1936
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001937 0x5f,0x10,0x00,0xf1,
1938 0x5f,0x10,0x00,0xf1,
1939
1940 0x01,0x00,0x00,0x54, // b.ne #0
1941 0xe1,0xff,0xff,0x54, // b.ne #-4
1942 0xcb,0xff,0xff,0x54, // b.lt #-8
1943 0xae,0xff,0xff,0x54, // b.al #-12
1944 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1945 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Kleindbc19ea2020-11-18 13:32:14 -06001946
1947 0x43,0x00,0x01,0x8b,
1948 0x43,0x0c,0x81,0x8b,
Mike Klein15a368d2019-06-26 10:21:12 -04001949 });
Mike Kleine51632e2019-06-26 14:47:43 -04001950
Mike Kleince7b88c2019-07-11 14:06:40 -05001951 // Can we cbz() to a not-yet-defined label?
1952 test_asm(r, [&](A& a) {
1953 A::Label l;
1954 a.cbz(A::x2, &l);
1955 a.add(A::x3, A::x2, 32);
1956 a.label(&l);
1957 a.ret(A::x30);
1958 },{
1959 0x42,0x00,0x00,0xb4, // cbz x2, #8
1960 0x43,0x80,0x00,0x91, // add x3, x2, #32
1961 0xc0,0x03,0x5f,0xd6, // ret
1962 });
1963
1964 // If we start a label as a backward label,
1965 // can we redefine it to be a future label?
1966 // (Not sure this is useful... just want to test it works.)
1967 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001968 A::Label l1;
1969 a.label(&l1);
Mike Kleince7b88c2019-07-11 14:06:40 -05001970 a.add(A::x3, A::x2, 32);
1971 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1972
Mike Kleinc74db792020-05-11 11:57:12 -05001973 A::Label l2; // Start off the same...
1974 a.label(&l2);
Mike Kleince7b88c2019-07-11 14:06:40 -05001975 a.add(A::x3, A::x2, 32);
1976 a.cbz(A::x2, &l2); // Looks like this will go backward...
1977 a.add(A::x2, A::x2, 4);
1978 a.add(A::x3, A::x2, 32);
1979 a.label(&l2); // But no... actually forward! What a switcheroo!
1980 },{
1981 0x43,0x80,0x00,0x91, // add x3, x2, #32
1982 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1983
1984 0x43,0x80,0x00,0x91, // add x3, x2, #32
1985 0x62,0x00,0x00,0xb4, // cbz x2, #12
1986 0x42,0x10,0x00,0x91, // add x2, x2, #4
1987 0x43,0x80,0x00,0x91, // add x3, x2, #32
1988 });
1989
Mike Klein81d52672019-07-30 11:11:09 -05001990 // Loading from a label on ARM.
1991 test_asm(r, [&](A& a) {
1992 A::Label fore,aft;
1993 a.label(&fore);
1994 a.word(0x01234567);
1995 a.ldrq(A::v1, &fore);
1996 a.ldrq(A::v2, &aft);
1997 a.label(&aft);
1998 a.word(0x76543210);
1999 },{
2000 0x67,0x45,0x23,0x01,
2001 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
2002 0x22,0x00,0x00,0x9c, // ldr q2, #4
2003 0x10,0x32,0x54,0x76,
2004 });
2005
Mike Kleine51632e2019-06-26 14:47:43 -04002006 test_asm(r, [&](A& a) {
2007 a.ldrq(A::v0, A::x8);
2008 a.strq(A::v0, A::x8);
2009 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05002010 0x00,0x01,0xc0,0x3d,
2011 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04002012 });
Mike Klein1fa149a2019-07-01 11:18:08 -05002013
2014 test_asm(r, [&](A& a) {
Mike Klein8d78da92020-11-25 13:53:20 -06002015 a.dup4s (A::v0, A::x8);
Mike Kleindbc19ea2020-11-18 13:32:14 -06002016 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
2017 a.ld1r8h (A::v0, A::x8);
2018 a.ld1r16b(A::v0, A::x8);
2019 },{
Mike Klein8d78da92020-11-25 13:53:20 -06002020 0x00,0x0d,0x04,0x4e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06002021 0x00,0xc9,0x40,0x4d,
2022 0x00,0xc5,0x40,0x4d,
2023 0x00,0xc1,0x40,0x4d,
2024 });
2025
2026 test_asm(r, [&](A& a) {
Mike Kleindd069a92021-01-20 13:51:33 -06002027 a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding
2028 a.ld44s(A::v0, A::x8);
2029 a.st24s(A::v0, A::x8);
2030 a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding
Mike Kleinf988bb52021-01-27 12:53:34 -06002031
2032 a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding
2033 a.ld24s(A::v0, A::x8, 1);
2034 a.ld24s(A::v0, A::x8, 2);
2035 a.ld24s(A::v0, A::x8, 3);
2036
2037 a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8]
2038 a.ld44s(A::v0, A::x8, 1);
2039 a.ld44s(A::v0, A::x8, 2);
2040 a.ld44s(A::v0, A::x8, 3);
Mike Kleindd069a92021-01-20 13:51:33 -06002041 },{
2042 0x00,0x89,0x40,0x4c,
2043 0x00,0x09,0x40,0x4c,
2044 0x00,0x89,0x00,0x4c,
2045 0x00,0x09,0x00,0x4c,
Mike Kleinf988bb52021-01-27 12:53:34 -06002046
2047 0x00,0x81,0x60,0x0d,
2048 0x00,0x91,0x60,0x0d,
2049 0x00,0x81,0x60,0x4d,
2050 0x00,0x91,0x60,0x4d,
2051
2052 0x00,0xa1,0x60,0x0d,
2053 0x00,0xb1,0x60,0x0d,
2054 0x00,0xa1,0x60,0x4d,
2055 0x00,0xb1,0x60,0x4d,
Mike Kleindd069a92021-01-20 13:51:33 -06002056 });
2057
2058 test_asm(r, [&](A& a) {
Mike Klein1fa149a2019-07-01 11:18:08 -05002059 a.xtns2h(A::v0, A::v0);
2060 a.xtnh2b(A::v0, A::v0);
2061 a.strs (A::v0, A::x0);
2062
2063 a.ldrs (A::v0, A::x0);
2064 a.uxtlb2h(A::v0, A::v0);
2065 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06002066
2067 a.uminv4s(A::v3, A::v4);
Mike Kleindbc19ea2020-11-18 13:32:14 -06002068 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0]
2069 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1]
2070 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3
Mike Klein1fa149a2019-07-01 11:18:08 -05002071 },{
2072 0x00,0x28,0x61,0x0e,
2073 0x00,0x28,0x21,0x0e,
2074 0x00,0x00,0x00,0xbd,
2075
2076 0x00,0x00,0x40,0xbd,
2077 0x00,0xa4,0x08,0x2f,
2078 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06002079
2080 0x83,0xa8,0xb1,0x6e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06002081 0x83,0x3c,0x04,0x0e,
2082 0x83,0x3c,0x0c,0x0e,
2083 0x64,0x1c,0x1c,0x4e,
Mike Klein1fa149a2019-07-01 11:18:08 -05002084 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05002085
2086 test_asm(r, [&](A& a) {
2087 a.ldrb(A::v0, A::x8);
2088 a.strb(A::v0, A::x8);
2089 },{
2090 0x00,0x01,0x40,0x3d,
2091 0x00,0x01,0x00,0x3d,
2092 });
Mike Klein81d52672019-07-30 11:11:09 -05002093
2094 test_asm(r, [&](A& a) {
Mike Kleindbc19ea2020-11-18 13:32:14 -06002095 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24]
2096 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12]
2097 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6]
2098 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3]
Mike Kleina7470df2020-12-03 12:06:27 -06002099
2100 a.strs(A::x0, A::x1, 3); // str w0, [x1, #12]
Mike Kleindbc19ea2020-11-18 13:32:14 -06002101 },{
2102 0x20,0x0c,0x40,0xf9,
2103 0x20,0x0c,0x40,0xb9,
2104 0x20,0x0c,0x40,0x79,
2105 0x20,0x0c,0x40,0x39,
Mike Kleina7470df2020-12-03 12:06:27 -06002106
2107 0x20,0x0c,0x00,0xb9,
Mike Kleindbc19ea2020-11-18 13:32:14 -06002108 });
2109
2110 test_asm(r, [&](A& a) {
Mike Kleinf5097db2020-12-03 09:21:00 -06002111 a.tbl (A::v0, A::v1, A::v2);
Mike Kleinc7bca522020-12-03 10:01:29 -06002112 a.uzp14s(A::v0, A::v1, A::v2);
2113 a.uzp24s(A::v0, A::v1, A::v2);
Mike Kleinf5097db2020-12-03 09:21:00 -06002114 a.zip14s(A::v0, A::v1, A::v2);
2115 a.zip24s(A::v0, A::v1, A::v2);
Mike Klein81d52672019-07-30 11:11:09 -05002116 },{
2117 0x20,0x00,0x02,0x4e,
Mike Kleinc7bca522020-12-03 10:01:29 -06002118 0x20,0x18,0x82,0x4e,
2119 0x20,0x58,0x82,0x4e,
Mike Kleinf5097db2020-12-03 09:21:00 -06002120 0x20,0x38,0x82,0x4e,
2121 0x20,0x78,0x82,0x4e,
Mike Klein81d52672019-07-30 11:11:09 -05002122 });
Mike Klein05642042019-06-18 12:16:06 -05002123}
Mike Reedbcb46c02020-03-23 17:51:01 -04002124
2125DEF_TEST(SkVM_approx_math, r) {
2126 auto eval = [](int N, float values[], auto fn) {
2127 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002128 skvm::Ptr inout = b.varying<float>();
Mike Reedbcb46c02020-03-23 17:51:01 -04002129
2130 b.storeF(inout, fn(&b, b.loadF(inout)));
2131
2132 b.done().eval(N, values);
2133 };
2134
2135 auto compare = [r](int N, const float values[], const float expected[]) {
2136 for (int i = 0; i < N; ++i) {
John Stilesf6bb6192022-01-20 19:50:13 -05002137 REPORTER_ASSERT(r, (values[i] == expected[i]) ||
2138 SkScalarNearlyEqual(values[i], expected[i], 0.001f),
2139 "evaluated to %g, but expected %g", values[i], expected[i]);
Mike Reedbcb46c02020-03-23 17:51:01 -04002140 }
2141 };
2142
2143 // log2
2144 {
2145 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
2146 constexpr int N = SK_ARRAY_COUNT(values);
2147 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2148 return b->approx_log2(v);
2149 });
2150 const float expected[] = {-2, -1, 0, 1, 2, 3};
2151 compare(N, values, expected);
2152 }
2153
2154 // pow2
2155 {
John Stilesf6bb6192022-01-20 19:50:13 -05002156 float values[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160};
Mike Reedbcb46c02020-03-23 17:51:01 -04002157 constexpr int N = SK_ARRAY_COUNT(values);
2158 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2159 return b->approx_pow2(v);
2160 });
John Stilesf6bb6192022-01-20 19:50:13 -05002161 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY};
Mike Reedbcb46c02020-03-23 17:51:01 -04002162 compare(N, values, expected);
2163 }
John Stiles260d9252022-01-31 17:44:10 -05002164 // powf -- 1^x
Mike Reedbcb46c02020-03-23 17:51:01 -04002165 {
John Stiles260d9252022-01-31 17:44:10 -05002166 float exps[] = {-2, -1, 0, 1, 2};
2167 constexpr int N = SK_ARRAY_COUNT(exps);
2168 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2169 return b->approx_powf(b->splat(1.0f), exp);
Mike Reedbcb46c02020-03-23 17:51:01 -04002170 });
John Stiles260d9252022-01-31 17:44:10 -05002171 const float expected[] = {1, 1, 1, 1, 1};
2172 compare(N, exps, expected);
2173 }
2174 // powf -- 2^x
2175 {
2176 float exps[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160};
2177 constexpr int N = SK_ARRAY_COUNT(exps);
2178 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2179 return b->approx_powf(2.0, exp);
2180 });
2181 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY};
2182 compare(N, exps, expected);
Mike Reedbcb46c02020-03-23 17:51:01 -04002183 }
2184 // powf -- 3^x
2185 {
2186 float exps[] = {-2, -1, 0, 1, 2};
2187 constexpr int N = SK_ARRAY_COUNT(exps);
2188 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2189 return b->approx_powf(b->splat(3.0f), exp);
2190 });
2191 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
2192 compare(N, exps, expected);
2193 }
John Stiles260d9252022-01-31 17:44:10 -05002194 // powf -- x^0.5
2195 {
2196 float bases[] = {0, 1, 4, 9, 16};
2197 constexpr int N = SK_ARRAY_COUNT(bases);
2198 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2199 return b->approx_powf(base, b->splat(0.5f));
2200 });
2201 const float expected[] = {0, 1, 2, 3, 4};
2202 compare(N, bases, expected);
2203 }
2204 // powf -- x^1
2205 {
2206 float bases[] = {0, 1, 2, 3, 4};
2207 constexpr int N = SK_ARRAY_COUNT(bases);
2208 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2209 return b->approx_powf(base, b->splat(1.0f));
2210 });
2211 const float expected[] = {0, 1, 2, 3, 4};
2212 compare(N, bases, expected);
2213 }
2214 // powf -- x^2
2215 {
2216 float bases[] = {0, 1, 2, 3, 4};
2217 constexpr int N = SK_ARRAY_COUNT(bases);
2218 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2219 return b->approx_powf(base, b->splat(2.0f));
2220 });
2221 const float expected[] = {0, 1, 4, 9, 16};
2222 compare(N, bases, expected);
2223 }
Mike Reed82ff25e2020-04-07 13:51:41 -04002224
Mike Reedd468a162020-04-11 14:14:00 -04002225 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04002226 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002227 skvm::Ptr inout = b.varying<float>();
Mike Reed82ff25e2020-04-07 13:51:41 -04002228 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04002229 float actual = arg;
2230 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04002231
Mike Reedd468a162020-04-11 14:14:00 -04002232 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04002233
2234 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04002235 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04002236 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04002237 }
Mike Reed1b84ef22020-04-13 17:56:24 -04002238 return err;
2239 };
2240
2241 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2242 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002243 skvm::Ptr in0 = b.varying<float>();
2244 skvm::Ptr in1 = b.varying<float>();
2245 skvm::Ptr out = b.varying<float>();
Mike Reed1b84ef22020-04-13 17:56:24 -04002246 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2247 float actual;
2248 b.done().eval(1, &arg0, &arg1, &actual);
2249
2250 float err = std::abs(actual - expected);
2251
2252 if (err > tolerance) {
2253 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2254 REPORTER_ASSERT(r, true);
2255 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002256 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04002257 };
2258
Mike Reed801ba0d2020-04-10 12:37:36 -04002259 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04002260 {
2261 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04002262 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04002263 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2264 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2265 return approx_sin(x);
2266 });
2267 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2268 return approx_cos(x);
2269 });
2270 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002271
2272 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2273 // so bring in the domain a little.
2274 constexpr float eps = 0.16f;
2275 float err = 0;
2276 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2277 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2278 return approx_tan(x);
2279 });
2280 // try again with some multiples of P, to check our periodicity
2281 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2282 return approx_tan(x + 3*P);
2283 });
2284 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2285 return approx_tan(x - 3*P);
2286 });
2287 }
John Stiles4250eff2022-02-04 16:09:04 -05002288 if ((false)) { SkDebugf("tan error %g\n", err); }
Mike Reedd468a162020-04-11 14:14:00 -04002289 }
2290
2291 // asin, acos, atan
2292 {
2293 constexpr float tol = 0.00175f;
2294 float err = 0;
2295 for (float x = -1; x <= 1; x += 1.0f/64) {
2296 err += test(x, asin(x), tol, [](skvm::F32 x) {
2297 return approx_asin(x);
2298 });
2299 test(x, acos(x), tol, [](skvm::F32 x) {
2300 return approx_acos(x);
2301 });
Mike Reed801ba0d2020-04-10 12:37:36 -04002302 }
John Stiles4250eff2022-02-04 16:09:04 -05002303 if ((false)) { SkDebugf("asin error %g\n", err); }
Mike Reedd468a162020-04-11 14:14:00 -04002304
2305 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04002306 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04002307 err += test(x, atan(x), tol, [](skvm::F32 x) {
2308 return approx_atan(x);
2309 });
2310 }
John Stiles4250eff2022-02-04 16:09:04 -05002311 if ((false)) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04002312
2313 for (float y = -3; y <= 3; y += 1) {
2314 for (float x = -3; x <= 3; x += 1) {
2315 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
Mike Klein962020f2020-05-01 10:35:18 -05002316 return approx_atan2(y,x);
Mike Reed1b84ef22020-04-13 17:56:24 -04002317 });
2318 }
2319 }
John Stiles4250eff2022-02-04 16:09:04 -05002320 if ((false)) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04002321 }
Mike Reedbcb46c02020-03-23 17:51:01 -04002322}
Mike Klein210288f2020-04-08 11:31:07 -05002323
2324DEF_TEST(SkVM_min_max, r) {
2325 // min() and max() have subtle behavior when one argument is NaN and
2326 // the other isn't. It's not sound to blindly swap their arguments.
2327 //
2328 // All backends must behave like std::min() and std::max(), which are
2329 //
2330 // min(x,y) = y<x ? y : x
2331 // max(x,y) = x<y ? y : x
2332
2333 // ±NaN, ±0, ±1, ±inf
2334 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2335 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2336
2337 float f[8];
2338 memcpy(f, bits, sizeof(bits));
2339
2340 auto identical = [&](float x, float y) {
2341 uint32_t X,Y;
2342 memcpy(&X, &x, 4);
2343 memcpy(&Y, &y, 4);
2344 return X == Y;
2345 };
2346
2347 // Test min/max with non-constant x, non-constant y.
2348 // (Whether x and y are varying or uniform shouldn't make any difference.)
2349 {
2350 skvm::Builder b;
2351 {
Mike Klein00e43df2021-01-08 13:45:42 -06002352 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002353 mn = b.varying<float>(),
2354 mx = b.varying<float>();
2355
2356 skvm::F32 x = b.loadF(src),
2357 y = b.uniformF(b.uniform(), 0);
2358
2359 b.storeF(mn, b.min(x,y));
2360 b.storeF(mx, b.max(x,y));
2361 }
2362
Mike Kleinfc017c72021-02-08 10:45:19 -06002363 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002364 float mn[8], mx[8];
2365 for (int i = 0; i < 8; i++) {
2366 // min() and max() everything with f[i].
2367 program.eval(8, f,mn,mx, &f[i]);
2368
2369 for (int j = 0; j < 8; j++) {
2370 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2371 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2372 }
2373 }
2374 });
2375 }
2376
2377 // Test each with constant on the right.
2378 for (int i = 0; i < 8; i++) {
2379 skvm::Builder b;
2380 {
Mike Klein00e43df2021-01-08 13:45:42 -06002381 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002382 mn = b.varying<float>(),
2383 mx = b.varying<float>();
2384
2385 skvm::F32 x = b.loadF(src),
2386 y = b.splat(f[i]);
2387
2388 b.storeF(mn, b.min(x,y));
2389 b.storeF(mx, b.max(x,y));
2390 }
2391
Mike Kleinfc017c72021-02-08 10:45:19 -06002392 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002393 float mn[8], mx[8];
2394 program.eval(8, f,mn,mx);
2395 for (int j = 0; j < 8; j++) {
2396 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2397 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2398 }
2399 });
2400 }
2401
2402 // Test each with constant on the left.
2403 for (int i = 0; i < 8; i++) {
2404 skvm::Builder b;
2405 {
Mike Klein00e43df2021-01-08 13:45:42 -06002406 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002407 mn = b.varying<float>(),
2408 mx = b.varying<float>();
2409
2410 skvm::F32 x = b.splat(f[i]),
2411 y = b.loadF(src);
2412
2413 b.storeF(mn, b.min(x,y));
2414 b.storeF(mx, b.max(x,y));
2415 }
2416
Mike Kleinfc017c72021-02-08 10:45:19 -06002417 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002418 float mn[8], mx[8];
2419 program.eval(8, f,mn,mx);
2420 for (int j = 0; j < 8; j++) {
2421 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2422 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2423 }
2424 });
2425 }
2426}
Mike Klein4d680cd2020-07-15 09:58:51 -05002427
2428DEF_TEST(SkVM_halfs, r) {
2429 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2430 0xc400,0xb800,0xbc00,0xc000};
2431 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2432 -4.0f,-0.5f,-1.0f,-2.0f};
2433 {
2434 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002435 skvm::Ptr src = b.varying<uint16_t>(),
Mike Klein4d680cd2020-07-15 09:58:51 -05002436 dst = b.varying<float>();
Mike Klein42d67a62020-12-01 10:14:55 -06002437 b.storeF(dst, b.from_fp16(b.load16(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002438
Mike Kleinfc017c72021-02-08 10:45:19 -06002439 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein4d680cd2020-07-15 09:58:51 -05002440 float dst[8];
2441 program.eval(8, hs, dst);
2442 for (int i = 0; i < 8; i++) {
2443 REPORTER_ASSERT(r, dst[i] == fs[i]);
2444 }
2445 });
2446 }
2447 {
2448 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002449 skvm::Ptr src = b.varying<float>(),
Mike Klein4d680cd2020-07-15 09:58:51 -05002450 dst = b.varying<uint16_t>();
Mike Klein42d67a62020-12-01 10:14:55 -06002451 b.store16(dst, b.to_fp16(b.loadF(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002452
Mike Kleinfc017c72021-02-08 10:45:19 -06002453 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein4d680cd2020-07-15 09:58:51 -05002454 uint16_t dst[8];
2455 program.eval(8, fs, dst);
2456 for (int i = 0; i < 8; i++) {
2457 REPORTER_ASSERT(r, dst[i] == hs[i]);
2458 }
2459 });
2460 }
2461}
Mike Klein6732da02020-07-16 13:03:18 -05002462
2463DEF_TEST(SkVM_64bit, r) {
2464 uint32_t lo[65],
2465 hi[65];
2466 uint64_t wide[65];
2467 for (int i = 0; i < 65; i++) {
2468 lo[i] = 2*i+0;
2469 hi[i] = 2*i+1;
2470 wide[i] = ((uint64_t)lo[i] << 0)
2471 | ((uint64_t)hi[i] << 32);
2472 }
2473
2474 {
2475 skvm::Builder b;
2476 {
John Stiles68f56062021-08-03 12:31:56 -04002477 skvm::Ptr widePtr = b.varying<uint64_t>(),
2478 loPtr = b.varying<int>(),
2479 hiPtr = b.varying<int>();
2480 b.store32(loPtr, b.load64(widePtr, 0));
2481 b.store32(hiPtr, b.load64(widePtr, 1));
Mike Klein6732da02020-07-16 13:03:18 -05002482 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002483 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein6732da02020-07-16 13:03:18 -05002484 uint32_t l[65], h[65];
2485 program.eval(65, wide,l,h);
2486 for (int i = 0; i < 65; i++) {
2487 REPORTER_ASSERT(r, l[i] == lo[i]);
2488 REPORTER_ASSERT(r, h[i] == hi[i]);
2489 }
2490 });
2491 }
2492
2493 {
2494 skvm::Builder b;
2495 {
John Stiles68f56062021-08-03 12:31:56 -04002496 skvm::Ptr widePtr = b.varying<uint64_t>(),
2497 loPtr = b.varying<int>(),
2498 hiPtr = b.varying<int>();
2499 b.store64(widePtr, b.load32(loPtr), b.load32(hiPtr));
Mike Klein6732da02020-07-16 13:03:18 -05002500 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002501 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein6732da02020-07-16 13:03:18 -05002502 uint64_t w[65];
2503 program.eval(65, w,lo,hi);
2504 for (int i = 0; i < 65; i++) {
2505 REPORTER_ASSERT(r, w[i] == wide[i]);
2506 }
2507 });
2508 }
2509}
Mike Kleine942b8c2020-07-21 10:17:14 -05002510
Mike Kleinb19518d2020-12-03 14:39:41 -06002511DEF_TEST(SkVM_128bit, r) {
2512 float floats[4*63];
2513 uint8_t packed[4*63];
2514
2515 for (int i = 0; i < 4*63; i++) {
2516 floats[i] = i * (1/255.0f);
2517 }
2518
Mike Klein447f3312021-02-08 09:46:59 -06002519 skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType),
2520 rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType);
Mike Kleinb19518d2020-12-03 14:39:41 -06002521
2522 { // Convert RGBA F32 to RGBA 8888, testing 128-bit loads.
2523 skvm::Builder b;
2524 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002525 skvm::Ptr dst = b.varying(4),
2526 src = b.varying(16);
Mike Kleinb19518d2020-12-03 14:39:41 -06002527
2528 skvm::Color c = b.load(rgba_ffff, src);
2529 b.store(rgba_8888, dst, c);
2530 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002531 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleinb19518d2020-12-03 14:39:41 -06002532 memset(packed, 0, sizeof(packed));
2533 program.eval(63, packed, floats);
2534 for (int i = 0; i < 4*63; i++) {
2535 REPORTER_ASSERT(r, packed[i] == i);
2536 }
2537 });
2538 }
2539
2540
2541 { // Convert RGBA 8888 to RGBA F32, testing 128-bit stores.
2542 skvm::Builder b;
2543 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002544 skvm::Ptr dst = b.varying(16),
2545 src = b.varying(4);
Mike Kleinb19518d2020-12-03 14:39:41 -06002546
2547 skvm::Color c = b.load(rgba_8888, src);
2548 b.store(rgba_ffff, dst, c);
2549 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002550 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleinb19518d2020-12-03 14:39:41 -06002551 memset(floats, 0, sizeof(floats));
2552 program.eval(63, floats, packed);
2553 for (int i = 0; i < 4*63; i++) {
2554 REPORTER_ASSERT(r, floats[i] == i * (1/255.0f));
2555 }
2556 });
2557 }
2558
2559}
2560
Mike Kleine942b8c2020-07-21 10:17:14 -05002561DEF_TEST(SkVM_is_NaN_is_finite, r) {
2562 skvm::Builder b;
2563 {
Mike Klein00e43df2021-01-08 13:45:42 -06002564 skvm::Ptr src = b.varying<float>(),
Mike Kleine942b8c2020-07-21 10:17:14 -05002565 nan = b.varying<int>(),
2566 fin = b.varying<int>();
2567 b.store32(nan, is_NaN (b.loadF(src)));
2568 b.store32(fin, is_finite(b.loadF(src)));
2569 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002570 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleine942b8c2020-07-21 10:17:14 -05002571 // ±NaN, ±0, ±1, ±inf
2572 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2573 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2574 uint32_t nan[8], fin[8];
2575 program.eval(8, bits, nan,fin);
2576
2577 for (int i = 0; i < 8; i++) {
2578 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2579 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2580 i == 4 || i == 5) ? 0xffffffff : 0));
2581 }
2582 });
2583}
Mike Klein0cfd5032020-07-28 11:08:27 -05002584
2585DEF_TEST(SkVM_args, r) {
2586 // Test we can handle at least six arguments.
2587 skvm::Builder b;
2588 {
Mike Klein00e43df2021-01-08 13:45:42 -06002589 skvm::Ptr dst = b.varying<float>(),
Mike Klein0cfd5032020-07-28 11:08:27 -05002590 A = b.varying<float>(),
2591 B = b.varying<float>(),
2592 C = b.varying<float>(),
2593 D = b.varying<float>(),
2594 E = b.varying<float>();
2595 storeF(dst, b.loadF(A)
2596 + b.loadF(B)
2597 + b.loadF(C)
2598 + b.loadF(D)
2599 + b.loadF(E));
2600 }
2601
Mike Kleinfc017c72021-02-08 10:45:19 -06002602 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein0cfd5032020-07-28 11:08:27 -05002603 float dst[17],A[17],B[17],C[17],D[17],E[17];
2604 for (int i = 0; i < 17; i++) {
2605 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2606 }
2607 program.eval(17, dst,A,B,C,D,E);
2608 for (int i = 0; i < 17; i++) {
2609 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2610 }
2611 });
2612}
Mike Klein9791e502020-09-15 12:43:38 -05002613
John Stiles68f56062021-08-03 12:31:56 -04002614DEF_TEST(SkVM_badpack, reporter) {
Mike Kleinee40ec62020-11-20 15:34:16 -06002615 // Test case distilled from actual failing draw,
2616 // originally with a bad arm64 implementation of pack().
2617 skvm::Builder p;
2618 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002619 skvm::UPtr uniforms = p.uniform();
2620 skvm::Ptr dst = p.varying<uint16_t>();
Mike Kleinee40ec62020-11-20 15:34:16 -06002621
Mike Klein5ec9c4e2020-12-01 10:43:46 -06002622 skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
Mike Kleinee40ec62020-11-20 15:34:16 -06002623 a = p.splat(0xf);
2624
2625 skvm::I32 _4444 = p.splat(0);
2626 _4444 = pack(_4444, r, 12);
2627 _4444 = pack(_4444, a, 0);
2628 store16(dst, _4444);
2629 }
2630
Mike Kleinfc017c72021-02-08 10:45:19 -06002631 test_jit_and_interpreter(p, [&](const skvm::Program& program){
Mike Kleinee40ec62020-11-20 15:34:16 -06002632 const float uniforms[] = { 0.0f, 0.0f,
2633 1.0f, 0.0f, 0.0f, 1.0f };
2634
2635 uint16_t dst[17] = {0};
2636 program.eval(17, uniforms,dst);
2637 for (int i = 0; i < 17; i++) {
John Stiles68f56062021-08-03 12:31:56 -04002638 REPORTER_ASSERT(reporter, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
Mike Kleinee40ec62020-11-20 15:34:16 -06002639 }
2640 });
2641}
Mike Klein960bd2d2020-12-21 14:33:55 -06002642
2643DEF_TEST(SkVM_features, r) {
2644 auto build_program = [](skvm::Builder* b) {
2645 skvm::F32 x = b->loadF(b->varying<float>());
2646 b->storeF(b->varying<float>(), x*x+x);
2647 };
2648
2649 { // load-fma-store with FMA available.
2650 skvm::Features features;
2651 features.fma = true;
2652 skvm::Builder b(features);
2653 build_program(&b);
2654 REPORTER_ASSERT(r, b.optimize().size() == 3);
2655 }
2656
2657 { // load-mul-add-store without FMA.
2658 skvm::Features features;
2659 features.fma = false;
2660 skvm::Builder b(features);
2661 build_program(&b);
2662 REPORTER_ASSERT(r, b.optimize().size() == 4);
2663 }
2664
2665 { // Auto-detected, could be either.
2666 skvm::Builder b;
2667 build_program(&b);
2668 REPORTER_ASSERT(r, b.optimize().size() == 3
2669 || b.optimize().size() == 4);
2670 }
2671}
Mike Klein0a804272021-01-06 10:36:22 -06002672
2673DEF_TEST(SkVM_gather_can_hoist, r) {
2674 // A gather instruction isn't necessarily varying... it's whatever its index is.
2675 // First a typical gather scenario with varying index.
2676 {
2677 skvm::Builder b;
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002678 skvm::UPtr uniforms = b.uniform();
2679 skvm::Ptr buf = b.varying<int>();
Mike Klein0a804272021-01-06 10:36:22 -06002680 skvm::I32 ix = b.load32(buf);
2681 b.store32(buf, b.gather32(uniforms,0, ix));
2682
2683 skvm::Program p = b.done();
2684
2685 // ix is varying, so the gather is too.
2686 //
2687 // loop:
2688 // v0 = load32 buf
2689 // v1 = gather32 uniforms+0 v0
2690 // store32 buf v1
2691 REPORTER_ASSERT(r, p.instructions().size() == 3);
2692 REPORTER_ASSERT(r, p.loop() == 0);
2693 }
2694
2695 // Now the same but with a uniform index instead.
2696 {
2697 skvm::Builder b;
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002698 skvm::UPtr uniforms = b.uniform();
2699 skvm::Ptr buf = b.varying<int>();
Mike Klein0a804272021-01-06 10:36:22 -06002700 skvm::I32 ix = b.uniform32(uniforms,8);
2701 b.store32(buf, b.gather32(uniforms,0, ix));
2702
2703 skvm::Program p = b.done();
2704
2705 // ix is uniform, so the gather is too.
2706 //
2707 // v0 = uniform32 uniforms+8
2708 // v1 = gather32 uniforms+0 v0
2709 // loop:
2710 // store32 buf v1
2711 REPORTER_ASSERT(r, p.instructions().size() == 3);
2712 REPORTER_ASSERT(r, p.loop() == 2);
2713 }
2714}
Mike Klein279ca2e2021-01-06 10:57:19 -06002715
2716DEF_TEST(SkVM_dont_dedup_loads, r) {
2717 // We've been assuming that all Ops with the same arguments produce the same value
2718 // and deduplicating them, which results in a simple common subexpression eliminator.
2719 //
2720 // But we can't soundly dedup two identical loads with a store between.
2721 // If we dedup the loads in this test program it will always increment by 1, not K.
2722 constexpr int K = 2;
2723 skvm::Builder b;
2724 {
Mike Klein00e43df2021-01-08 13:45:42 -06002725 skvm::Ptr buf = b.varying<int>();
Mike Klein279ca2e2021-01-06 10:57:19 -06002726 for (int i = 0; i < K; i++) {
2727 b.store32(buf, b.load32(buf) + 1);
2728 }
2729 }
2730
Mike Kleinfc017c72021-02-08 10:45:19 -06002731 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein279ca2e2021-01-06 10:57:19 -06002732 int buf[] = { 0,1,2,3,4 };
2733 program.eval(SK_ARRAY_COUNT(buf), buf);
2734 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
2735 REPORTER_ASSERT(r, buf[i] == i+K);
2736 }
2737 });
2738}
2739
2740DEF_TEST(SkVM_dont_dedup_stores, r) {
2741 // Following a similar line of reasoning to SkVM_dont_dedup_loads,
2742 // we cannot dedup stores either. A different store between two identical stores
2743 // will invalidate the first store, meaning we do need to reissue that store operation.
2744 skvm::Builder b;
2745 {
Mike Klein00e43df2021-01-08 13:45:42 -06002746 skvm::Ptr buf = b.varying<int>();
Mike Klein279ca2e2021-01-06 10:57:19 -06002747 b.store32(buf, b.splat(4));
2748 b.store32(buf, b.splat(5));
2749 b.store32(buf, b.splat(4)); // If we dedup'd, we'd skip this store.
2750 }
2751
Mike Kleinfc017c72021-02-08 10:45:19 -06002752 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein279ca2e2021-01-06 10:57:19 -06002753 int buf[42];
2754 program.eval(SK_ARRAY_COUNT(buf), buf);
2755 for (int x : buf) {
2756 REPORTER_ASSERT(r, x == 4);
2757 }
2758 });
2759}
Mike Kleinff4decc2021-02-10 16:13:35 -06002760
2761DEF_TEST(SkVM_fast_mul, r) {
2762 skvm::Builder b;
2763 {
2764 skvm::Ptr src = b.varying<float>(),
2765 fast = b.varying<float>(),
2766 slow = b.varying<float>();
2767 skvm::F32 x = b.loadF(src);
2768 b.storeF(fast, fast_mul(0.0f, x));
2769 b.storeF(slow, 0.0f * x);
2770 }
2771 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2772 const uint32_t bits[] = {
2773 0x0000'0000, 0x8000'0000, //±0
2774 0x3f80'0000, 0xbf80'0000, //±1
2775 0x7f80'0000, 0xff80'0000, //±inf
2776 0x7f80'0001, 0xff80'0001, //±NaN
2777 };
2778 float fast[8],
2779 slow[8];
2780 program.eval(8,bits,fast,slow);
2781
2782 for (int i = 0; i < 8; i++) {
2783 REPORTER_ASSERT(r, fast[i] == 0.0f);
2784
2785 if (i < 4) {
2786 REPORTER_ASSERT(r, slow[i] == 0.0f);
2787 } else {
2788 REPORTER_ASSERT(r, isnan(slow[i]));
2789 }
2790 }
2791 });
2792}
Julia Lavrovaf74c7892021-12-13 15:01:32 -05002793
2794DEF_TEST(SkVM_duplicates, reporter) {
2795 {
2796 skvm::Builder p(true);
2797 auto rptr = p.varying<int>();
2798
2799 skvm::F32 r = p.loadF(rptr),
2800 g = p.splat(0.0f),
2801 b = p.splat(0.0f),
2802 a = p.splat(1.0f);
2803
2804 p.unpremul(&r, &g, &b, a);
2805 p.storeF(rptr, r);
2806
2807 std::vector<skvm::Instruction> program = b->program();
2808
2809 auto withDuplicates = skvm::finalize(program);
2810 int duplicates = 0;
2811 for (const auto& instr : withDuplicates) {
2812 if (instr.op == skvm::Op::duplicate) {
2813 ++duplicates;
2814 }
2815 }
2816 REPORTER_ASSERT(reporter, duplicates > 0);
2817
2818 auto eliminatedAsDeadCode = skvm::eliminate_dead_code(program);
2819 for (const auto& instr : eliminatedAsDeadCode) {
2820 REPORTER_ASSERT(reporter, instr.op != skvm::Op::duplicate);
2821 }
2822 }
2823
2824 {
2825 skvm::Builder p(false);
2826 auto rptr = p.varying<int>();
2827
2828 skvm::F32 r = p.loadF(rptr),
2829 g = p.splat(0.0f),
2830 b = p.splat(0.0f),
2831 a = p.splat(1.0f);
2832
2833 p.unpremul(&r, &g, &b, a);
2834 p.storeF(rptr, r);
2835
2836 auto withoutDuplicates = p.done().instructions();
2837 for (const auto& instr : withoutDuplicates) {
2838 REPORTER_ASSERT(reporter, instr.op != skvm::Op::duplicate);
2839 }
2840 }
2841}
Julia Lavrova20187a22021-12-21 14:33:35 +00002842
Brian Osman4c4cf432021-12-21 13:28:14 -05002843DEF_TEST(SkVM_Visualizer, r) {
Julia Lavrova20187a22021-12-21 14:33:35 +00002844 const char* src =
2845 "int main(int x, int y) {\n"
2846 " int a = 99;\n"
2847 " if (x > 0) a += 100;\n"
2848 " if (y > 0) a += 101;\n"
2849 " a = 102;\n"
2850 " return a;\n"
2851 "}";
2852 GrShaderCaps caps;
2853 SkSL::Compiler compiler(&caps);
2854 SkSL::Program::Settings settings;
2855 auto program = compiler.convertProgram(SkSL::ProgramKind::kGeneric,
John Stilesffeb6f22022-02-02 16:51:18 -05002856 std::string(src), settings);
Julia Lavrova20187a22021-12-21 14:33:35 +00002857 const SkSL::FunctionDefinition* main = SkSL::Program_GetFunction(*program, "main");
2858 SkSL::SkVMDebugTrace d;
2859 d.setSource(src);
2860 auto v = std::make_unique<skvm::viz::Visualizer>(&d);
2861 skvm::Builder b(skvm::Features{}, /*createDuplicates=*/true);
2862 SkSL::ProgramToSkVM(*program, *main, &b, &d, /*uniforms=*/{});
2863
2864 skvm::Program p = b.done(nullptr, true, std::move(v));
2865#if defined(SKVM_JIT)
2866 SkDynamicMemoryWStream asmFile;
2867 p.disassemble(&asmFile);
2868 auto dumpData = asmFile.detachAsData();
2869 std::string dumpString((const char*)dumpData->data(), dumpData->size());
2870#else
Brian Osman4c4cf432021-12-21 13:28:14 -05002871 std::string dumpString;
Julia Lavrova20187a22021-12-21 14:33:35 +00002872#endif
2873 SkDynamicMemoryWStream vizFile;
2874 p.visualizer()->dump(&vizFile, dumpString.c_str());
2875 auto vizData = vizFile.detachAsData();
2876 std::string html((const char*)vizData->data(), vizData->size());
2877 //b.dump();
2878 //std::printf(html.c_str());
2879 // Check that html contains all types of information:
2880 if (!dumpString.empty() && !std::strstr(dumpString.c_str(), "Program not JIT'd.")) {
2881 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='machine'>")); // machine commands
2882 }
2883 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='normal'>")); // SkVM byte code
2884 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='source'>")); // C++ source
2885 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='dead'>")); // dead code
2886 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='dead deduped'>")); // deduped removed
2887 REPORTER_ASSERT(r, std::strstr(html.c_str(), // deduped origins
2888 "<tr class='normal origin'>"
2889 "<td>&#8593;&#8593;&#8593; *13</td>"
2890 "<td>v2 = splat 0 (0)</td></tr>"));
2891 REPORTER_ASSERT(r, std::strstr(html.c_str(), // trace enter
2892 "<tr class='source'><td class='mask'>&#8618;v9</td>"
2893 "<td colspan=2>int main(int x, int y)</td></tr>"));
2894 REPORTER_ASSERT(r, std::strstr(html.c_str(), // trace exit
2895 "<tr class='source'><td class='mask'>&#8617;v9</td>"
2896 "<td colspan=2>int main(int x, int y)</td></tr>"));
2897}