blob: c8734d02966a392af320530f3823df2c369c51b1 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein7e650762019-07-02 15:21:11 -050014
Mike Klein9977efa2019-07-15 12:22:36 -050015template <typename Fn>
Mike Kleinfc017c72021-02-08 10:45:19 -060016static void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) {
17 skvm::Program p = b.done();
18 test(p);
19 if (p.hasJIT()) {
20 test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false));
Mike Kleinb5a30762019-10-16 10:11:56 -050021 }
Mike Kleinb5a30762019-10-16 10:11:56 -050022}
23
Mike Klein7542ab52020-04-02 08:50:16 -050024DEF_TEST(SkVM_eliminate_dead_code, r) {
25 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -040026 {
Mike Klein00e43df2021-01-08 13:45:42 -060027 skvm::Ptr arg = b.varying<int>();
Mike Klein7542ab52020-04-02 08:50:16 -050028 skvm::I32 l = b.load32(arg);
29 skvm::I32 a = b.add(l, l);
30 b.add(a, b.splat(7));
31 }
Herb Derbyf20400e2020-03-18 16:11:25 -040032
Mike Klein7542ab52020-04-02 08:50:16 -050033 std::vector<skvm::Instruction> program = b.program();
34 REPORTER_ASSERT(r, program.size() == 4);
35
Mike Klein5b701e12020-04-02 10:34:24 -050036 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -050037 REPORTER_ASSERT(r, program.size() == 0);
38}
39
Mike Klein9fdadb92019-07-30 12:30:13 -050040DEF_TEST(SkVM_Pointless, r) {
41 // Let's build a program with no memory arguments.
42 // It should all be pegged as dead code, but we should be able to "run" it.
43 skvm::Builder b;
44 {
45 b.add(b.splat(5.0f),
46 b.splat(4.0f));
47 }
48
Mike Kleinfc017c72021-02-08 10:45:19 -060049 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -050050 for (int N = 0; N < 64; N++) {
51 program.eval(N);
52 }
53 });
54
Mike Kleined9b1f12020-02-06 13:02:32 -060055 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -050056 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -050057 }
58}
59
Mike Klein10fc1e62020-04-13 11:57:05 -050060DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -060061 skvm::Builder b;
62 b.store32(b.varying<int>(), b.splat(42));
63
Mike Kleinfc017c72021-02-08 10:45:19 -060064 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
Mike Klein10fc1e62020-04-13 11:57:05 -050065 int buf[18];
66 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -060067
Mike Klein10fc1e62020-04-13 11:57:05 -050068 p.eval(17, buf);
69 for (int i = 0; i < 17; i++) {
70 REPORTER_ASSERT(r, buf[i] == 42);
71 }
72 REPORTER_ASSERT(r, buf[17] == 47);
73 });
Mike Kleinb6149312020-02-26 13:04:23 -060074}
Mike Klein11efa182020-02-27 12:04:37 -060075
Mike Klein10fc1e62020-04-13 11:57:05 -050076DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -060077 skvm::Builder b;
78 {
79 auto src = b.varying<int>(),
80 dst = b.varying<int>();
81 b.store32(dst, b.load32(src));
82 }
83
Mike Kleinfc017c72021-02-08 10:45:19 -060084 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
Mike Klein10fc1e62020-04-13 11:57:05 -050085 int src[] = {1,2,3,4,5,6,7,8,9},
86 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -060087
Mike Klein10fc1e62020-04-13 11:57:05 -050088 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
89 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
90 REPORTER_ASSERT(r, dst[i] == src[i]);
91 }
92 size_t i = SK_ARRAY_COUNT(src)-1;
93 REPORTER_ASSERT(r, dst[i] == 0);
94 });
Mike Klein11efa182020-02-27 12:04:37 -060095}
Mike Kleinb6149312020-02-26 13:04:23 -060096
Mike Kleinc7c1f9c2021-02-08 10:24:52 -060097DEF_TEST(SkVM_allow_jit, r) {
98 skvm::Builder b;
99 {
100 auto src = b.varying<int>(),
101 dst = b.varying<int>();
102 b.store32(dst, b.load32(src));
103 }
104
105 if (b.done("", /*allow_jit=*/true).hasJIT()) {
106 REPORTER_ASSERT(r, !b.done("", false).hasJIT());
107 }
108}
109
Mike Klein81756e42019-06-12 11:36:28 -0500110DEF_TEST(SkVM_LoopCounts, r) {
111 // Make sure we cover all the exact N we want.
112
Mike Klein9977efa2019-07-15 12:22:36 -0500113 // buf[i] += 1
114 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -0600115 skvm::Ptr arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500116 b.store32(arg,
117 b.add(b.splat(1),
118 b.load32(arg)));
119
Mike Kleinfc017c72021-02-08 10:45:19 -0600120 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500121 int buf[64];
122 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500123 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
124 buf[i] = i;
125 }
126 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500127
Mike Klein9977efa2019-07-15 12:22:36 -0500128 for (int i = 0; i < N; i++) {
129 REPORTER_ASSERT(r, buf[i] == i+1);
130 }
131 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
132 REPORTER_ASSERT(r, buf[i] == i);
133 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500134 }
135 });
Mike Klein81756e42019-06-12 11:36:28 -0500136}
Mike Klein05642042019-06-18 12:16:06 -0500137
Mike Kleinb2b6a992020-01-13 16:34:30 -0600138DEF_TEST(SkVM_gather32, r) {
139 skvm::Builder b;
140 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400141 skvm::UPtr uniforms = b.uniform();
142 skvm::Ptr buf = b.varying<int>();
Mike Kleinb2b6a992020-01-13 16:34:30 -0600143 skvm::I32 x = b.load32(buf);
144 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
145 }
146
Mike Kleinfc017c72021-02-08 10:45:19 -0600147 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600148 const int img[] = {12,34,56,78, 90,98,76,54};
149
150 int buf[20];
151 for (int i = 0; i < 20; i++) {
152 buf[i] = i;
153 }
154
155 struct Uniforms {
156 const int* img;
157 } uniforms{img};
158
159 program.eval(20, &uniforms, buf);
160 int i = 0;
161 REPORTER_ASSERT(r, buf[i] == 12); i++;
162 REPORTER_ASSERT(r, buf[i] == 34); i++;
163 REPORTER_ASSERT(r, buf[i] == 56); i++;
164 REPORTER_ASSERT(r, buf[i] == 78); i++;
165 REPORTER_ASSERT(r, buf[i] == 90); i++;
166 REPORTER_ASSERT(r, buf[i] == 98); i++;
167 REPORTER_ASSERT(r, buf[i] == 76); i++;
168 REPORTER_ASSERT(r, buf[i] == 54); i++;
169
170 REPORTER_ASSERT(r, buf[i] == 12); i++;
171 REPORTER_ASSERT(r, buf[i] == 34); i++;
172 REPORTER_ASSERT(r, buf[i] == 56); i++;
173 REPORTER_ASSERT(r, buf[i] == 78); i++;
174 REPORTER_ASSERT(r, buf[i] == 90); i++;
175 REPORTER_ASSERT(r, buf[i] == 98); i++;
176 REPORTER_ASSERT(r, buf[i] == 76); i++;
177 REPORTER_ASSERT(r, buf[i] == 54); i++;
178
179 REPORTER_ASSERT(r, buf[i] == 12); i++;
180 REPORTER_ASSERT(r, buf[i] == 34); i++;
181 REPORTER_ASSERT(r, buf[i] == 56); i++;
182 REPORTER_ASSERT(r, buf[i] == 78); i++;
183 });
184}
185
Mike Klein81d52672019-07-30 11:11:09 -0500186DEF_TEST(SkVM_gathers, r) {
187 skvm::Builder b;
188 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400189 skvm::UPtr uniforms = b.uniform();
190 skvm::Ptr buf32 = b.varying<int>(),
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600191 buf16 = b.varying<uint16_t>(),
192 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500193
194 skvm::I32 x = b.load32(buf32);
195
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600196 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
197 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
198 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500199 }
200
Mike Kleinfc017c72021-02-08 10:45:19 -0600201 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500202 const int img[] = {12,34,56,78, 90,98,76,54};
203
204 constexpr int N = 20;
205 int buf32[N];
206 uint16_t buf16[N];
207 uint8_t buf8 [N];
208
209 for (int i = 0; i < 20; i++) {
210 buf32[i] = i;
211 }
212
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600213 struct Uniforms {
214 const int* img;
215 } uniforms{img};
216
217 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500218 int i = 0;
219 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
220 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
221 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
222 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
223 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
224 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
225 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
226 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
227
228 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
229 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
230 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
231 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
232 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
233 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
234 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
235 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
236
237 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
238 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
239 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
240 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
241 });
242}
243
Mike Klein21e85eb2020-04-17 13:57:13 -0500244DEF_TEST(SkVM_gathers2, r) {
245 skvm::Builder b;
246 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400247 skvm::UPtr uniforms = b.uniform();
248 skvm::Ptr buf32 = b.varying<int>(),
Mike Klein21e85eb2020-04-17 13:57:13 -0500249 buf16 = b.varying<uint16_t>(),
250 buf8 = b.varying<uint8_t>();
251
252 skvm::I32 x = b.load32(buf32);
253
254 b.store32(buf32, b.gather32(uniforms,0, x));
255 b.store16(buf16, b.gather16(uniforms,0, x));
256 b.store8 (buf8 , b.gather8 (uniforms,0, x));
257 }
258
Mike Kleinfc017c72021-02-08 10:45:19 -0600259 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein21e85eb2020-04-17 13:57:13 -0500260 uint8_t img[256];
261 for (int i = 0; i < 256; i++) {
262 img[i] = i;
263 }
264
265 int buf32[64];
266 uint16_t buf16[64];
267 uint8_t buf8 [64];
268
269 for (int i = 0; i < 64; i++) {
270 buf32[i] = (i*47)&63;
271 buf16[i] = 0;
272 buf8 [i] = 0;
273 }
274
275 struct Uniforms {
276 const uint8_t* img;
277 } uniforms{img};
278
279 program.eval(64, &uniforms, buf32, buf16, buf8);
280
281 for (int i = 0; i < 64; i++) {
282 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
283 }
284
285 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
286 REPORTER_ASSERT(r, buf16[63] == 0x2322);
287
288 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
289 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
290 });
291}
292
Mike Klein81d52672019-07-30 11:11:09 -0500293DEF_TEST(SkVM_bitops, r) {
294 skvm::Builder b;
295 {
Mike Klein00e43df2021-01-08 13:45:42 -0600296 skvm::Ptr ptr = b.varying<int>();
Mike Klein81d52672019-07-30 11:11:09 -0500297
298 skvm::I32 x = b.load32(ptr);
299
Mike Klein4067a942020-04-05 10:25:32 -0500300 x = b.bit_and (x, b.splat(0xf1)); // 0x40
301 x = b.bit_or (x, b.splat(0x80)); // 0xc0
302 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
303 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500304
305 x = b.shl(x, 28); // 0xe000'0000
306 x = b.sra(x, 28); // 0xffff'fffe
307 x = b.shr(x, 1); // 0x7fff'ffff
308
309 b.store32(ptr, x);
310 }
311
Mike Kleinfc017c72021-02-08 10:45:19 -0600312 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500313 int x = 0x42;
314 program.eval(1, &x);
315 REPORTER_ASSERT(r, x == 0x7fff'ffff);
316 });
317}
318
Mike Klein4067a942020-04-05 10:25:32 -0500319DEF_TEST(SkVM_select_is_NaN, r) {
320 skvm::Builder b;
321 {
Mike Klein00e43df2021-01-08 13:45:42 -0600322 skvm::Ptr src = b.varying<float>(),
Mike Klein4067a942020-04-05 10:25:32 -0500323 dst = b.varying<float>();
324
325 skvm::F32 x = b.loadF(src);
326 x = select(is_NaN(x), b.splat(0.0f)
327 , x);
328 b.storeF(dst, x);
329 }
330
331 std::vector<skvm::OptimizedInstruction> program = b.optimize();
332 REPORTER_ASSERT(r, program.size() == 4);
333 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
334 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
335 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
336 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
337
Mike Kleinfc017c72021-02-08 10:45:19 -0600338 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500339 // ±NaN, ±0, ±1, ±inf
340 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
341 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
342 uint32_t dst[SK_ARRAY_COUNT(src)];
343 program.eval(SK_ARRAY_COUNT(src), src, dst);
344
345 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
346 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
347 }
348 });
349}
350
Mike Klein81d52672019-07-30 11:11:09 -0500351DEF_TEST(SkVM_f32, r) {
352 skvm::Builder b;
353 {
Mike Klein00e43df2021-01-08 13:45:42 -0600354 skvm::Ptr arg = b.varying<float>();
Mike Klein81d52672019-07-30 11:11:09 -0500355
Mike Reedf5ff4c22020-03-23 14:57:53 -0400356 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500357 y = b.add(x,x), // y = 2x
358 z = b.sub(y,x), // z = 2x-x = x
359 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400360 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500361 }
362
Mike Kleinfc017c72021-02-08 10:45:19 -0600363 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500364 float buf[] = { 1,2,3,4,5,6,7,8,9 };
365 program.eval(SK_ARRAY_COUNT(buf), buf);
366 for (float v : buf) {
367 REPORTER_ASSERT(r, v == 1.0f);
368 }
369 });
370}
371
372DEF_TEST(SkVM_cmp_i32, r) {
373 skvm::Builder b;
374 {
375 skvm::I32 x = b.load32(b.varying<int>());
376
377 auto to_bit = [&](int shift, skvm::I32 mask) {
378 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
379 };
380
381 skvm::I32 m = b.splat(0);
382 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
383 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
384 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
385 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
386 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
387 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
388
389 b.store32(b.varying<int>(), m);
390 }
Mike Kleinfc017c72021-02-08 10:45:19 -0600391 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500392 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
393 int out[SK_ARRAY_COUNT(in)];
394
395 program.eval(SK_ARRAY_COUNT(in), in, out);
396
397 REPORTER_ASSERT(r, out[0] == 0b001111);
398 REPORTER_ASSERT(r, out[1] == 0b001100);
399 REPORTER_ASSERT(r, out[2] == 0b001010);
400 REPORTER_ASSERT(r, out[3] == 0b001010);
401 REPORTER_ASSERT(r, out[4] == 0b000010);
402 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
403 REPORTER_ASSERT(r, out[i] == 0b110010);
404 }
405 });
406}
407
408DEF_TEST(SkVM_cmp_f32, r) {
409 skvm::Builder b;
410 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400411 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500412
413 auto to_bit = [&](int shift, skvm::I32 mask) {
414 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
415 };
416
417 skvm::I32 m = b.splat(0);
418 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
419 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
420 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
421 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
422 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
423 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
424
425 b.store32(b.varying<int>(), m);
426 }
427
Mike Kleinfc017c72021-02-08 10:45:19 -0600428 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500429 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
430 int out[SK_ARRAY_COUNT(in)];
431
432 program.eval(SK_ARRAY_COUNT(in), in, out);
433
434 REPORTER_ASSERT(r, out[0] == 0b001111);
435 REPORTER_ASSERT(r, out[1] == 0b001100);
436 REPORTER_ASSERT(r, out[2] == 0b001010);
437 REPORTER_ASSERT(r, out[3] == 0b001010);
438 REPORTER_ASSERT(r, out[4] == 0b000010);
439 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
440 REPORTER_ASSERT(r, out[i] == 0b110010);
441 }
442 });
443}
444
Mike Klein14548b92020-02-28 14:02:29 -0600445DEF_TEST(SkVM_index, r) {
446 skvm::Builder b;
447 b.store32(b.varying<int>(), b.index());
448
Mike Kleinfc017c72021-02-08 10:45:19 -0600449 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600450 int buf[23];
451 program.eval(SK_ARRAY_COUNT(buf), buf);
452 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
453 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
454 }
455 });
456}
457
Mike Klein4a131192019-07-19 13:56:41 -0500458DEF_TEST(SkVM_mad, r) {
459 // This program is designed to exercise the tricky corners of instruction
460 // and register selection for Op::mad_f32.
461
462 skvm::Builder b;
463 {
Mike Klein00e43df2021-01-08 13:45:42 -0600464 skvm::Ptr arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500465
Mike Kleincac130f2020-09-25 14:47:44 -0500466 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein4a131192019-07-19 13:56:41 -0500467 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
468 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
469 w = b.mad(z,z,y), // w can alias z but not y.
470 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600471 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500472 }
473
Mike Kleinfc017c72021-02-08 10:45:19 -0600474 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500475 int x = 2;
476 program.eval(1, &x);
477 // x = 2
478 // y = 2*2 + 2 = 6
479 // z = 6*6 + 2 = 38
480 // w = 38*38 + 6 = 1450
481 // v = 1450*6 + 1450 = 10150
482 REPORTER_ASSERT(r, x == 10150);
483 });
484}
485
Mike Klein7c0332c2020-03-05 14:18:04 -0600486DEF_TEST(SkVM_fms, r) {
487 // Create a pattern that can be peepholed into an Op::fms_f32.
488 skvm::Builder b;
489 {
Mike Klein00e43df2021-01-08 13:45:42 -0600490 skvm::Ptr arg = b.varying<int>();
Mike Klein7c0332c2020-03-05 14:18:04 -0600491
Mike Kleincac130f2020-09-25 14:47:44 -0500492 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600493 v = b.sub(b.mul(x, b.splat(2.0f)),
494 b.splat(1.0f));
495 b.store32(arg, b.trunc(v));
496 }
497
Mike Kleinfc017c72021-02-08 10:45:19 -0600498 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600499 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
500 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
501
502 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
503 REPORTER_ASSERT(r, buf[i] = 2*i-1);
504 }
505 });
506}
507
508DEF_TEST(SkVM_fnma, r) {
509 // Create a pattern that can be peepholed into an Op::fnma_f32.
510 skvm::Builder b;
511 {
Mike Klein00e43df2021-01-08 13:45:42 -0600512 skvm::Ptr arg = b.varying<int>();
Mike Klein7c0332c2020-03-05 14:18:04 -0600513
Mike Kleincac130f2020-09-25 14:47:44 -0500514 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600515 v = b.sub(b.splat(1.0f),
516 b.mul(x, b.splat(2.0f)));
517 b.store32(arg, b.trunc(v));
518 }
519
Mike Kleinfc017c72021-02-08 10:45:19 -0600520 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600521 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
522 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
523
524 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
525 REPORTER_ASSERT(r, buf[i] = 1-2*i);
526 }
527 });
528}
529
Mike Klein81d52672019-07-30 11:11:09 -0500530DEF_TEST(SkVM_madder, r) {
531 skvm::Builder b;
532 {
Mike Klein00e43df2021-01-08 13:45:42 -0600533 skvm::Ptr arg = b.varying<float>();
Mike Klein81d52672019-07-30 11:11:09 -0500534
Mike Reedf5ff4c22020-03-23 14:57:53 -0400535 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500536 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
537 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
538 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400539 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500540 }
541
Mike Kleinfc017c72021-02-08 10:45:19 -0600542 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500543 float x = 2.0f;
544 // y = 2*2 + 2 = 6
545 // z = 6*2 + 6 = 18
546 // w = 6*6 + 18 = 54
547 program.eval(1, &x);
548 REPORTER_ASSERT(r, x == 54.0f);
549 });
550}
551
Mike Kleinf22faaf2020-01-09 07:27:39 -0600552DEF_TEST(SkVM_floor, r) {
553 skvm::Builder b;
554 {
Mike Klein00e43df2021-01-08 13:45:42 -0600555 skvm::Ptr arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400556 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600557 }
558
Mike Kleinfc017c72021-02-08 10:45:19 -0600559 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600560 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
561 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
562 program.eval(SK_ARRAY_COUNT(buf), buf);
563 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
564 REPORTER_ASSERT(r, buf[i] == want[i]);
565 }
566 });
567}
568
Mike Klein5caf7de2020-03-12 11:05:46 -0500569DEF_TEST(SkVM_round, r) {
570 skvm::Builder b;
571 {
Mike Klein00e43df2021-01-08 13:45:42 -0600572 skvm::Ptr src = b.varying<float>();
573 skvm::Ptr dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400574 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500575 }
576
577 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
578 // We haven't explicitly guaranteed that here... it just probably is.
Mike Kleinfc017c72021-02-08 10:45:19 -0600579 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500580 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
581 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
582 int dst[SK_ARRAY_COUNT(buf)];
583
584 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
585 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
586 REPORTER_ASSERT(r, dst[i] == want[i]);
587 }
588 });
589}
590
Herb Derbyc02a41f2020-02-28 14:25:45 -0600591DEF_TEST(SkVM_min, r) {
592 skvm::Builder b;
593 {
Mike Klein00e43df2021-01-08 13:45:42 -0600594 skvm::Ptr src1 = b.varying<float>();
595 skvm::Ptr src2 = b.varying<float>();
596 skvm::Ptr dst = b.varying<float>();
Herb Derbyc02a41f2020-02-28 14:25:45 -0600597
Mike Reedf5ff4c22020-03-23 14:57:53 -0400598 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600599 }
600
Mike Kleinfc017c72021-02-08 10:45:19 -0600601 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600602 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
603 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
604 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
605 float d[SK_ARRAY_COUNT(s1)];
606 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
607 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
608 REPORTER_ASSERT(r, d[i] == want[i]);
609 }
610 });
611}
612
613DEF_TEST(SkVM_max, r) {
614 skvm::Builder b;
615 {
Mike Klein00e43df2021-01-08 13:45:42 -0600616 skvm::Ptr src1 = b.varying<float>();
617 skvm::Ptr src2 = b.varying<float>();
618 skvm::Ptr dst = b.varying<float>();
Herb Derbyc02a41f2020-02-28 14:25:45 -0600619
Mike Reedf5ff4c22020-03-23 14:57:53 -0400620 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600621 }
622
Mike Kleinfc017c72021-02-08 10:45:19 -0600623 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600624 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
625 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
626 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
627 float d[SK_ARRAY_COUNT(s1)];
628 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
629 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
630 REPORTER_ASSERT(r, d[i] == want[i]);
631 }
632 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600633}
634
Mike Kleinf98d0d32019-07-22 14:30:18 -0500635DEF_TEST(SkVM_hoist, r) {
636 // This program uses enough constants that it will fail to JIT if we hoist them.
637 // The JIT will try again without hoisting, and that'll just need 2 registers.
638 skvm::Builder b;
639 {
Mike Klein00e43df2021-01-08 13:45:42 -0600640 skvm::Ptr arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500641 skvm::I32 x = b.load32(arg);
642 for (int i = 0; i < 32; i++) {
643 x = b.add(x, b.splat(i));
644 }
645 b.store32(arg, x);
646 }
647
Mike Kleinfc017c72021-02-08 10:45:19 -0600648 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500649 int x = 4;
650 program.eval(1, &x);
651 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
652 // x += 496
653 REPORTER_ASSERT(r, x == 500);
654 });
655}
656
Mike Kleinb9944122019-08-02 12:22:39 -0500657DEF_TEST(SkVM_select, r) {
658 skvm::Builder b;
659 {
Mike Klein00e43df2021-01-08 13:45:42 -0600660 skvm::Ptr buf = b.varying<int>();
Mike Kleinb9944122019-08-02 12:22:39 -0500661
662 skvm::I32 x = b.load32(buf);
663
664 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
665
666 b.store32(buf, x);
667 }
668
Mike Kleinfc017c72021-02-08 10:45:19 -0600669 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500670 int buf[] = { 0,1,2,3,4,5,6,7,8 };
671 program.eval(SK_ARRAY_COUNT(buf), buf);
672 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
673 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
674 }
675 });
676}
677
Mike Kleinf471c822021-01-05 13:31:15 -0600678DEF_TEST(SkVM_swap, r) {
679 skvm::Builder b;
680 {
681 // This program is the equivalent of
682 // x = *X
683 // y = *Y
684 // *X = y
685 // *Y = x
686 // One rescheduling of the program based only on data flow of Op arguments is
687 // x = *X
688 // *Y = x
689 // y = *Y
690 // *X = y
691 // but this reordering does not produce the same results and is invalid.
Mike Klein00e43df2021-01-08 13:45:42 -0600692 skvm::Ptr X = b.varying<int>(),
Mike Kleinf471c822021-01-05 13:31:15 -0600693 Y = b.varying<int>();
694
695 skvm::I32 x = b.load32(X),
696 y = b.load32(Y);
697
698 b.store32(X, y);
699 b.store32(Y, x);
700 }
701
Mike Kleinfc017c72021-02-08 10:45:19 -0600702 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf471c822021-01-05 13:31:15 -0600703 int b1[] = { 0,1,2,3 };
704 int b2[] = { 4,5,6,7 };
705 program.eval(SK_ARRAY_COUNT(b1), b1, b2);
706 for (int i = 0; i < (int)SK_ARRAY_COUNT(b1); i++) {
707 REPORTER_ASSERT(r, b1[i] == 4 + i);
708 REPORTER_ASSERT(r, b2[i] == i);
709 }
710 });
711}
712
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500713DEF_TEST(SkVM_NewOps, r) {
714 // Exercise a somewhat arbitrary set of new ops.
715 skvm::Builder b;
716 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -0400717 skvm::Ptr buf = b.varying<int16_t>();
718 skvm::UPtr uniforms = b.uniform();
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500719
720 skvm::I32 x = b.load16(buf);
721
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600722 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500723
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600724 x = b.add(x, b.uniform32(uniforms, kPtr+0));
Mike Klein8b16bee2020-11-25 10:54:02 -0600725 x = b.mul(x, b.uniform32(uniforms, kPtr+4));
726 x = b.sub(x, b.uniform32(uniforms, kPtr+8));
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600727
Mike Klein8b16bee2020-11-25 10:54:02 -0600728 skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500729 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
730 x = b.select(b.gt(x, limit ), limit , x);
731
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600732 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500733
734 b.store16(buf, x);
735 }
736
Mike Kleinfc017c72021-02-08 10:45:19 -0600737 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500738 const int N = 31;
739 int16_t buf[N];
740 for (int i = 0; i < N; i++) {
741 buf[i] = i;
742 }
743
744 const int M = 16;
745 uint8_t img[M];
746 for (int i = 0; i < M; i++) {
747 img[i] = i*i;
748 }
749
750 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600751 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500752 int add = 5;
Mike Klein8b16bee2020-11-25 10:54:02 -0600753 int mul = 3;
754 int sub = 18;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500755 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600756 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500757
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600758 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500759
760 for (int i = 0; i < N; i++) {
761 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
762 int x = 3*(i-1);
763
764 // Then that's pinned to the limits of img.
765 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
766 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
767 REPORTER_ASSERT(r, buf[i] == img[x]);
768 }
769 });
770}
771
Herb Derbya37001e2021-07-22 17:34:21 -0400772DEF_TEST(SKVM_array32, r) {
Herb Derbya4953512021-07-23 11:08:25 -0400773
774
775
Herb Derbya37001e2021-07-22 17:34:21 -0400776 skvm::Builder b;
Herb Derbya4953512021-07-23 11:08:25 -0400777 skvm::Uniforms uniforms(b.uniform(), 0);
778 // Take up the first slot, so other uniforms are not at 0 offset.
779 uniforms.push(0);
780 int i[] = {3, 7};
781 skvm::Uniform array = uniforms.pushArray(i);
782 float f[] = {5, 9};
783 skvm::Uniform arrayF = uniforms.pushArrayF(f);
Herb Derbya37001e2021-07-22 17:34:21 -0400784 {
785 skvm::Ptr buf0 = b.varying<int32_t>(),
786 buf1 = b.varying<int32_t>(),
Herb Derbya4953512021-07-23 11:08:25 -0400787 buf2 = b.varying<int32_t>();
Herb Derbya37001e2021-07-22 17:34:21 -0400788
Herb Derbya4953512021-07-23 11:08:25 -0400789 skvm::I32 j = b.array32(array, 0);
790 b.store32(buf0, j);
791 skvm::I32 k = b.array32(array, 1);
792 b.store32(buf1, k);
793
794 skvm::F32 x = b.arrayF(arrayF, 0);
795 skvm::F32 y = b.arrayF(arrayF, 1);
796 b.store32(buf2, b.trunc(b.add(x, y)));
Herb Derbya37001e2021-07-22 17:34:21 -0400797 }
798
799 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbya4953512021-07-23 11:08:25 -0400800 const int K = 10;
801 int32_t buf0[K],
802 buf1[K],
803 buf2[K];
Herb Derbya37001e2021-07-22 17:34:21 -0400804
Herb Derbya4953512021-07-23 11:08:25 -0400805 // reset the i[0] for the two tests.
806 i[0] = 3;
807 f[1] = 9;
808 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
Herb Derbya37001e2021-07-22 17:34:21 -0400809 for (auto v : buf0) {
810 REPORTER_ASSERT(r, v == 3);
811 }
812 for (auto v : buf1) {
813 REPORTER_ASSERT(r, v == 7);
814 }
Herb Derbya4953512021-07-23 11:08:25 -0400815 for (auto v : buf2) {
816 REPORTER_ASSERT(r, v == 14);
817 }
Herb Derbya37001e2021-07-22 17:34:21 -0400818 i[0] = 4;
Herb Derbya4953512021-07-23 11:08:25 -0400819 f[1] = 10;
820 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
Herb Derbya37001e2021-07-22 17:34:21 -0400821 for (auto v : buf0) {
822 REPORTER_ASSERT(r, v == 4);
823 }
824 for (auto v : buf1) {
825 REPORTER_ASSERT(r, v == 7);
826 }
Herb Derbya4953512021-07-23 11:08:25 -0400827 for (auto v : buf2) {
828 REPORTER_ASSERT(r, v == 15);
829 }
Herb Derbya37001e2021-07-22 17:34:21 -0400830 });
831}
832
Mike Klein5a8404c2020-02-28 14:24:56 -0600833DEF_TEST(SkVM_sqrt, r) {
834 skvm::Builder b;
835 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400836 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600837
Mike Kleinfc017c72021-02-08 10:45:19 -0600838 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600839 constexpr int K = 17;
840 float buf[K];
841 for (int i = 0; i < K; i++) {
842 buf[i] = (float)(i*i);
843 }
844
845 // x^2 -> x
846 program.eval(K, buf);
847
848 for (int i = 0; i < K; i++) {
849 REPORTER_ASSERT(r, buf[i] == (float)i);
850 }
851 });
852}
853
Mike Klein3f7c8652019-11-07 10:33:56 -0600854DEF_TEST(SkVM_MSAN, r) {
855 // This little memset32() program should be able to JIT, but if we run that
856 // JIT code in an MSAN build, it won't see the writes initialize buf. So
857 // this tests that we're using the interpreter instead.
858 skvm::Builder b;
859 b.store32(b.varying<int>(), b.splat(42));
860
Mike Kleinfc017c72021-02-08 10:45:19 -0600861 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600862 constexpr int K = 17;
863 int buf[K]; // Intentionally uninitialized.
864 program.eval(K, buf);
865 sk_msan_assert_initialized(buf, buf+K);
866 for (int x : buf) {
867 REPORTER_ASSERT(r, x == 42);
868 }
869 });
870}
871
Mike Klein13601172019-11-08 15:01:02 -0600872DEF_TEST(SkVM_assert, r) {
873 skvm::Builder b;
874 b.assert_true(b.lt(b.load32(b.varying<int>()),
875 b.splat(42)));
876
Mike Kleinfc017c72021-02-08 10:45:19 -0600877 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600878 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600879 program.eval(SK_ARRAY_COUNT(buf), buf);
880 });
881}
882
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600883DEF_TEST(SkVM_premul, reporter) {
884 // Test that premul is short-circuited when alpha is known opaque.
885 {
886 skvm::Builder p;
887 auto rptr = p.varying<int>(),
888 aptr = p.varying<int>();
889
Mike Reedf5ff4c22020-03-23 14:57:53 -0400890 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600891 g = p.splat(0.0f),
892 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400893 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600894
895 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400896 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600897
898 // load red, load alpha, red *= alpha, store red
899 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
900 }
901
902 {
903 skvm::Builder p;
904 auto rptr = p.varying<int>();
905
Mike Reedf5ff4c22020-03-23 14:57:53 -0400906 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600907 g = p.splat(0.0f),
908 b = p.splat(0.0f),
909 a = p.splat(1.0f);
910
911 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400912 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600913
914 // load red, store red
915 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
916 }
917
918 // Same deal for unpremul.
919 {
920 skvm::Builder p;
921 auto rptr = p.varying<int>(),
922 aptr = p.varying<int>();
923
Mike Reedf5ff4c22020-03-23 14:57:53 -0400924 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600925 g = p.splat(0.0f),
926 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400927 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600928
929 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400930 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600931
932 // load red, load alpha, a bunch of unpremul instructions, store red
933 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
934 }
935
936 {
937 skvm::Builder p;
938 auto rptr = p.varying<int>();
939
Mike Reedf5ff4c22020-03-23 14:57:53 -0400940 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600941 g = p.splat(0.0f),
942 b = p.splat(0.0f),
943 a = p.splat(1.0f);
944
945 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400946 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600947
948 // load red, store red
949 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
950 }
951}
Mike Klein05642042019-06-18 12:16:06 -0500952
Mike Klein05642042019-06-18 12:16:06 -0500953template <typename Fn>
954static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -0400955 uint8_t buf[4096];
956 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -0500957 fn(a);
958
959 REPORTER_ASSERT(r, a.size() == expected.size());
960
Mike Klein88c0a902019-06-24 15:34:02 -0400961 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -0500962 want = expected.begin();
963 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -0500964 REPORTER_ASSERT(r, got[i] == want[i],
965 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -0500966 }
967}
968
969DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -0500970 // Easiest way to generate test cases is
971 //
972 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
973 //
974 // The -x86-asm-syntax=intel bit is optional, controlling the
975 // input syntax only; the output will always be AT&T op x,y,dst style.
976 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
977 // that a bit easier to use here, despite maybe favoring AT&T overall.
978
979 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -0500980 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -0500981 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -0600982 a.int3();
Mike Klein05642042019-06-18 12:16:06 -0500983 a.vzeroupper();
984 a.ret();
985 },{
Mike Kleinee5864a2019-11-11 09:16:44 -0600986 0xcc,
Mike Klein05642042019-06-18 12:16:06 -0500987 0xc5, 0xf8, 0x77,
988 0xc3,
989 });
990
Mike Klein237dbb42019-07-19 09:44:47 -0500991 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -0500992 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500993 a.ret();
994 a.align(4);
995 },{
996 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -0500997 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -0500998 });
Mike Klein61703a62019-06-18 15:01:12 -0500999
Mike Klein397fc882019-06-20 11:37:10 -05001000 test_asm(r, [&](A& a) {
1001 a.add(A::rax, 8); // Always good to test rax.
1002 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001003
Mike Klein397fc882019-06-20 11:37:10 -05001004 a.add(A::rdi, 12); // Last 0x48 REX
1005 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001006
Mike Klein86a645c2019-07-12 12:29:39 -05001007 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001008 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001009
Mike Klein397fc882019-06-20 11:37:10 -05001010 a.add(A::rsi, 128); // Requires 4 byte immediate.
1011 a.sub(A::r8 , 1000000);
Mike Kleinc15c9362020-04-16 11:10:36 -05001012
1013 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1014 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1015 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
Mike Klein68d075e2020-07-28 09:26:51 -05001016 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
Mike Kleinc15c9362020-04-16 11:10:36 -05001017 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
Mike Klein68d075e2020-07-28 09:26:51 -05001018 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
1019 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
Mike Kleinc15c9362020-04-16 11:10:36 -05001020 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1021 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1022 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1023
1024 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1025
1026 a.add( A::rax , A::rcx); // addq %rcx, %rax
1027 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1028 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1029 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1030
1031 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
Mike Klein61703a62019-06-18 15:01:12 -05001032 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001033 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001034 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001035
1036 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001037 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001038
Mike Klein86a645c2019-07-12 12:29:39 -05001039 0x49, 0x83, 0b11'000'000, 0x07,
1040 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001041
1042 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001043 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Kleinc15c9362020-04-16 11:10:36 -05001044
1045 0x48,0x83,0x06,0x07,
1046 0x48,0x83,0x46,0x0c,0x07,
1047 0x48,0x83,0x44,0x24,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001048 0x49,0x83,0x44,0x24,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001049 0x48,0x83,0x44,0x84,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001050 0x49,0x83,0x44,0x84,0x0c,0x07,
1051 0x4a,0x83,0x44,0xa0,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001052 0x4b,0x83,0x44,0x43,0x0c,0x07,
1053 0x49,0x83,0x44,0x03,0x0c,0x07,
1054 0x4a,0x83,0x44,0x18,0x0c,0x07,
1055
1056 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1057
1058 0x48,0x01,0xc8,
1059 0x48,0x01,0x08,
1060 0x48,0x01,0x48,0x0c,
1061 0x48,0x03,0x48,0x0c,
1062 0x48,0x2b,0x48,0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001063 });
Mike Klein397fc882019-06-20 11:37:10 -05001064
1065
1066 test_asm(r, [&](A& a) {
1067 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1068 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1069 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1070 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1071 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1072 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1073 },{
1074 /* VEX */ /*op*/ /*modRM*/
1075 0xc5, 0xf5, 0xfe, 0xc2,
1076 0xc5, 0x75, 0xfe, 0xc2,
1077 0xc5, 0xbd, 0xfe, 0xc2,
1078 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1079 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1080 0xc5, 0xf5, 0xfa, 0xc2,
1081 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001082
1083 test_asm(r, [&](A& a) {
Mike Klein84dd8f92020-09-15 07:57:27 -05001084 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1085 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1086 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1087 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1088
1089 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1090 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1091 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1092 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1093
1094 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1095 a.vpabsw (A::ymm4, A::ymm3);
1096 a.vpsllw (A::ymm4, A::ymm3, 12);
1097 a.vpsraw (A::ymm4, A::ymm3, 12);
1098 },{
1099 0xc5, 0xe5, 0xfd, 0xe2,
1100 0xc5, 0xe5, 0xe3, 0xe2,
1101 0xc5, 0xe5, 0x75, 0xe2,
1102 0xc5, 0xe5, 0x65, 0xe2,
1103
1104 0xc5, 0xe5, 0xea, 0xe2,
1105 0xc5, 0xe5, 0xee, 0xe2,
1106 0xc4,0xe2,0x65, 0x3a, 0xe2,
1107 0xc4,0xe2,0x65, 0x3e, 0xe2,
1108
1109 0xc4,0xe2,0x65, 0x0b, 0xe2,
1110 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1111 0xc5,0xdd,0x71, 0xf3, 0x0c,
1112 0xc5,0xdd,0x71, 0xe3, 0x0c,
1113 });
1114
1115 test_asm(r, [&](A& a) {
Mike Klein48c51bb2020-04-23 18:07:49 -05001116 A::Label l;
1117 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
Mike Klein714f8cc2019-11-06 12:54:46 -06001118 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1119 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1120 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1121 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1122 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1123 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Klein48c51bb2020-04-23 18:07:49 -05001124 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
Mike Kleinb9944122019-08-02 12:22:39 -05001125 },{
Mike Klein48c51bb2020-04-23 18:07:49 -05001126 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
Mike Kleinb9944122019-08-02 12:22:39 -05001127 0xc5,0xf5,0x76,0xc2,
1128 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001129 0xc5,0xf4,0xc2,0xc2,0x00,
1130 0xc5,0xf4,0xc2,0xc2,0x01,
1131 0xc5,0xf4,0xc2,0xc2,0x02,
1132 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001133 });
1134
1135 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001136 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1137 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1138 },{
1139 0xc5,0xf4,0x5d,0xc2,
1140 0xc5,0xf4,0x5f,0xc2,
1141 });
1142
1143 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001144 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1145 },{
1146 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1147 });
1148
1149 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001150 a.vpsrld(A::ymm15, A::ymm2, 8);
1151 a.vpsrld(A::ymm0 , A::ymm8, 5);
1152 },{
1153 0xc5, 0x85, 0x72,0xd2, 0x08,
1154 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1155 });
1156
1157 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001158 A::Label l;
Mike Klein184f6012020-07-22 13:17:29 -05001159 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
Mike Kleind8194dc2020-07-22 10:42:11 -05001160 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
Mike Kleinff0ae812019-06-20 15:03:44 -05001161 a.vpermq(A::ymm1, A::ymm2, 5);
Mike Kleind8194dc2020-07-22 10:42:11 -05001162 a.label(&l); // 6 bytes after vperm2f128
Mike Kleinff0ae812019-06-20 15:03:44 -05001163 },{
Mike Klein184f6012020-07-22 13:17:29 -05001164 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
Mike Kleind8194dc2020-07-22 10:42:11 -05001165 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
Mike Kleinff0ae812019-06-20 15:03:44 -05001166 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1167 });
Mike Kleine5053412019-06-21 12:37:22 -05001168
1169 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001170 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1171 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1172 },{
1173 0xc5,0xed,0x62,0x0f,
1174 0xc5,0xed,0x6a,0xcb,
1175 });
1176
1177 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001178 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1179 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1180 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1181 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1182 },{
1183 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1184 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1185 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1186 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1187 });
1188
1189 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001190 A::Label l;
1191 a.label(&l);
Mike Kleine5053412019-06-21 12:37:22 -05001192 a.byte(1);
1193 a.byte(2);
1194 a.byte(3);
1195 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001196
Mike Klein65c10b52019-07-12 09:22:21 -05001197 a.vbroadcastss(A::ymm0 , &l);
1198 a.vbroadcastss(A::ymm1 , &l);
1199 a.vbroadcastss(A::ymm8 , &l);
1200 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001201
Mike Klein65c10b52019-07-12 09:22:21 -05001202 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001203 a.vpaddd (A::ymm4, A::ymm3, &l);
1204 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001205
1206 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001207
1208 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001209 },{
1210 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001211
Mike Kleine5053412019-06-21 12:37:22 -05001212 /* VEX */ /*op*/ /* ModRM */ /* offset */
1213 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1214 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1215 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1216 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001217
1218 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001219
1220 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1221 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001222
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001223 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1224
1225 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001226 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001227
1228 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001229 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1230 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1231 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1232 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
Mike Klein94d054b2019-08-02 10:54:23 -05001233
1234 a.vbroadcastss(A::ymm8, A::xmm0);
1235 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001236 },{
1237 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1238 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1239 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1240 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1241 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001242
1243 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1244 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001245 });
1246
1247 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001248 A::Label l;
1249 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001250 a.jne(&l);
1251 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001252 a.je (&l);
1253 a.jmp(&l);
1254 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001255 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001256
Mike Kleinc15c9362020-04-16 11:10:36 -05001257 a.cmp(A::rdx, 1);
Mike Klein35b97c32019-07-12 12:32:45 -05001258 a.cmp(A::rax, 12);
1259 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001260 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001261 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1262 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1263 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1264 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1265 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001266 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001267
Mike Kleinc15c9362020-04-16 11:10:36 -05001268 0x48,0x83,0xfa,0x01,
Mike Klein35b97c32019-07-12 12:32:45 -05001269 0x48,0x83,0xf8,0x0c,
1270 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001271 });
Mike Klein120d9e82019-06-21 15:52:55 -05001272
1273 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001274 a.vmovups(A::ymm5, A::Mem{A::rsi});
1275 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001276
Mike Klein400ba222020-06-30 15:54:19 -05001277 a.vmovups(A::xmm5, A::Mem{A::rsi});
Mike Klein8390f2e2020-04-15 17:03:08 -05001278 a.vmovups(A::Mem{A::rsi}, A::xmm5);
Mike Klein95529e82019-08-02 11:43:43 -05001279
Mike Kleinedc2dac2020-04-15 16:18:27 -05001280 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1281 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001282
Mike Klein8390f2e2020-04-15 17:03:08 -05001283 a.vmovq(A::Mem{A::rdx}, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001284 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001285 /* VEX */ /*Op*/ /* ModRM */
1286 0xc5, 0xfc, 0x10, 0b00'101'110,
1287 0xc5, 0xfc, 0x11, 0b00'101'110,
1288
Mike Klein400ba222020-06-30 15:54:19 -05001289 0xc5, 0xf8, 0x10, 0b00'101'110,
Mike Klein95529e82019-08-02 11:43:43 -05001290 0xc5, 0xf8, 0x11, 0b00'101'110,
1291
Mike Klein52010b72019-08-02 11:18:00 -05001292 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001293 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001294
1295 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001296 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001297
1298 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001299 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1300 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1301 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001302
Mike Kleinedc2dac2020-04-15 16:18:27 -05001303 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1304 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1305 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001306 },{
1307 0xc5,0xfc,0x10,0x2c,0x24,
1308 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1309 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1310
1311 0xc5,0xfc,0x11,0x2c,0x24,
1312 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1313 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1314 });
1315
1316 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001317 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1318 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1319 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1320 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1321 a.movzbq(A::r8, A::Mem{A::rsi, 400});
Mike Klein35b97c32019-07-12 12:32:45 -05001322
Mike Kleinc15c9362020-04-16 11:10:36 -05001323 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1324 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1325 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1326 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1327 a.movzwq(A::r8, A::Mem{A::rsi, 400});
Mike Kleincb511042020-04-13 13:12:17 -05001328
Mike Klein8390f2e2020-04-15 17:03:08 -05001329 a.vmovd(A::Mem{A::rax}, A::xmm0);
1330 a.vmovd(A::Mem{A::rax}, A::xmm8);
1331 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1332
1333 a.vmovd(A::xmm0, A::Mem{A::rax});
1334 a.vmovd(A::xmm8, A::Mem{A::rax});
1335 a.vmovd(A::xmm0, A::Mem{A::r8 });
1336
1337 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1338 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1339 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1340
Mike Klein35b97c32019-07-12 12:32:45 -05001341 a.vmovd(A::rax, A::xmm0);
1342 a.vmovd(A::rax, A::xmm8);
Mike Klein8390f2e2020-04-15 17:03:08 -05001343 a.vmovd(A::r8 , A::xmm0);
Mike Klein35b97c32019-07-12 12:32:45 -05001344
1345 a.vmovd(A::xmm0, A::rax);
1346 a.vmovd(A::xmm8, A::rax);
Mike Klein8390f2e2020-04-15 17:03:08 -05001347 a.vmovd(A::xmm0, A::r8 );
Mike Klein35b97c32019-07-12 12:32:45 -05001348
Mike Kleinc15c9362020-04-16 11:10:36 -05001349 a.movb(A::Mem{A::rdx}, A::rax);
1350 a.movb(A::Mem{A::rdx}, A::r8 );
1351 a.movb(A::Mem{A::r8 }, A::rax);
Mike Klein35b97c32019-07-12 12:32:45 -05001352
Mike Kleinc15c9362020-04-16 11:10:36 -05001353 a.movb(A::rdx, A::Mem{A::rax});
1354 a.movb(A::rdx, A::Mem{A::r8 });
1355 a.movb(A::r8 , A::Mem{A::rax});
1356
1357 a.movb(A::rdx, 12);
1358 a.movb(A::rax, 4);
1359 a.movb(A::r8 , -1);
1360
1361 a.movb(A::Mem{A::rdx}, 12);
1362 a.movb(A::Mem{A::rax}, 4);
1363 a.movb(A::Mem{A::r8 }, -1);
1364 },{
1365 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1366 0x49,0x0f,0xb6,0x00,
1367 0x4c,0x0f,0xb6,0x06,
1368 0x4c,0x0f,0xb6,0x46, 12,
1369 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1370
1371 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1372 0x49,0x0f,0xb7,0x00,
1373 0x4c,0x0f,0xb7,0x06,
1374 0x4c,0x0f,0xb7,0x46, 12,
1375 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
Mike Kleincb511042020-04-13 13:12:17 -05001376
Mike Klein35b97c32019-07-12 12:32:45 -05001377 0xc5,0xf9,0x7e,0x00,
1378 0xc5,0x79,0x7e,0x00,
1379 0xc4,0xc1,0x79,0x7e,0x00,
1380
1381 0xc5,0xf9,0x6e,0x00,
1382 0xc5,0x79,0x6e,0x00,
1383 0xc4,0xc1,0x79,0x6e,0x00,
1384
Mike Klein93d3fab2020-01-14 10:46:44 -06001385 0xc5,0xf9,0x6e,0x04,0x88,
1386 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1387 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1388
Mike Klein35b97c32019-07-12 12:32:45 -05001389 0xc5,0xf9,0x7e,0xc0,
1390 0xc5,0x79,0x7e,0xc0,
1391 0xc4,0xc1,0x79,0x7e,0xc0,
1392
1393 0xc5,0xf9,0x6e,0xc0,
1394 0xc5,0x79,0x6e,0xc0,
1395 0xc4,0xc1,0x79,0x6e,0xc0,
1396
Mike Kleinc15c9362020-04-16 11:10:36 -05001397 0x48 ,0x88, 0x02,
1398 0x4c, 0x88, 0x02,
1399 0x49, 0x88, 0x00,
1400
1401 0x48 ,0x8a, 0x10,
1402 0x49, 0x8a, 0x10,
1403 0x4c, 0x8a, 0x00,
1404
1405 0x48, 0xc6, 0xc2, 0x0c,
1406 0x48, 0xc6, 0xc0, 0x04,
1407 0x49, 0xc6, 0xc0, 0xff,
1408
1409 0x48, 0xc6, 0x02, 0x0c,
1410 0x48, 0xc6, 0x00, 0x04,
1411 0x49, 0xc6, 0x00, 0xff,
Mike Klein35b97c32019-07-12 12:32:45 -05001412 });
1413
1414 test_asm(r, [&](A& a) {
Mike Klein4ecc9702020-07-30 10:03:10 -05001415 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1416 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1417
Mike Klein8390f2e2020-04-15 17:03:08 -05001418 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1419 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
Mike Klein52010b72019-08-02 11:18:00 -05001420
Mike Klein8390f2e2020-04-15 17:03:08 -05001421 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
Mike Klein4ecc9702020-07-30 10:03:10 -05001422 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
Mike Klein35b97c32019-07-12 12:32:45 -05001423
Mike Klein21e85eb2020-04-17 13:57:13 -05001424 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1425 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1426
1427 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1428 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1429
Mike Klein8390f2e2020-04-15 17:03:08 -05001430 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1431 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein95529e82019-08-02 11:43:43 -05001432
Mike Klein8390f2e2020-04-15 17:03:08 -05001433 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1434 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein35b97c32019-07-12 12:32:45 -05001435 },{
Mike Klein4ecc9702020-07-30 10:03:10 -05001436 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1437 0xc4,0x43,0x71, 0x22, 0x00, 3,
1438
Mike Klein52010b72019-08-02 11:18:00 -05001439 0xc5,0xb9, 0xc4, 0x0e, 4,
1440 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1441
Mike Klein35b97c32019-07-12 12:32:45 -05001442 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1443 0xc4,0x43,0x71, 0x20, 0x00, 12,
1444
Mike Klein21e85eb2020-04-17 13:57:13 -05001445 0xc4,0x63,0x7d,0x39,0xc1, 1,
1446 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1447
1448 0xc4,0x63,0x79,0x16,0x06, 3,
1449 0xc4,0xc3,0x79,0x16,0x08, 2,
1450
Mike Klein95529e82019-08-02 11:43:43 -05001451 0xc4,0x63,0x79, 0x15, 0x06, 7,
1452 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1453
Mike Klein35b97c32019-07-12 12:32:45 -05001454 0xc4,0x63,0x79, 0x14, 0x06, 7,
1455 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1456 });
1457
1458 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001459 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1460 },{
1461 0xc5, 0x9d, 0xdf, 0xda,
1462 });
Mike Klein9f4df802019-06-24 18:47:16 -04001463
Mike Kleind4546d62019-07-30 12:15:40 -05001464 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001465 A::Label l;
1466 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1467
1468 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1469 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1470 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1471
1472 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1473 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1474
1475 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1476 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1477 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1478 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1479 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1480
1481 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1482 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1483 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1484
Mike Kleind4546d62019-07-30 12:15:40 -05001485 a.vcvttps2dq(A::ymm3, A::ymm2);
1486 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001487 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001488 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001489 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001490 },{
1491 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001492
1493 0xc5,0xfd,0x6f,0x1e,
1494 0xc5,0xfd,0x6f,0x1c,0x24,
1495 0xc4,0xc1,0x7d,0x6f,0x1b,
1496
1497 0xc5,0xfd,0x6f,0x5e,0x04,
1498 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1499
1500 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1501 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1502 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1503 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1504 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1505
1506 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1507 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1508
1509 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1510
Mike Kleind4546d62019-07-30 12:15:40 -05001511 0xc5,0xfe,0x5b,0xda,
1512 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001513 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001514 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001515 });
1516
Mike Kleinbeaa1082020-01-13 14:04:18 -06001517 test_asm(r, [&](A& a) {
Mike Klein4d680cd2020-07-15 09:58:51 -05001518 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1519 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1520
1521 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1522 a.vcvtph2ps(A::ymm2, A::xmm3);
1523 },{
1524 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1525 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1526
1527 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1528 0xc4,0xe2,0x7d,0x13,0xd3,
1529 });
1530
1531 test_asm(r, [&](A& a) {
Mike Kleinbeaa1082020-01-13 14:04:18 -06001532 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1533 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1534 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1535 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1536 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1537 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1538 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1539 },{
1540 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1541 0xc4,0xe2,0x75,0x92,0x04,0x10,
1542 0xc4,0x62,0x75,0x92,0x14,0x10,
1543 0xc4,0xa2,0x75,0x92,0x04,0x20,
1544 0xc4,0xc2,0x75,0x92,0x04,0x11,
1545 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1546 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1547 });
1548
Mike Kleinc322f632020-01-13 16:18:58 -06001549 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001550 a.mov(A::rax, A::Mem{A::rdi, 0});
1551 a.mov(A::rax, A::Mem{A::rdi, 1});
1552 a.mov(A::rax, A::Mem{A::rdi, 512});
1553 a.mov(A::r15, A::Mem{A::r13, 42});
1554 a.mov(A::rax, A::Mem{A::r13, 42});
1555 a.mov(A::r15, A::Mem{A::rax, 42});
1556 a.mov(A::rax, 1);
1557 a.mov(A::rax, A::rcx);
Mike Kleinc322f632020-01-13 16:18:58 -06001558 },{
1559 0x48, 0x8b, 0x07,
1560 0x48, 0x8b, 0x47, 0x01,
1561 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1562 0x4d, 0x8b, 0x7d, 0x2a,
1563 0x49, 0x8b, 0x45, 0x2a,
1564 0x4c, 0x8b, 0x78, 0x2a,
Mike Kleinc15c9362020-04-16 11:10:36 -05001565 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1566 0x48, 0x89, 0xc8,
Mike Kleinc322f632020-01-13 16:18:58 -06001567 });
1568
Mike Klein9f4df802019-06-24 18:47:16 -04001569 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1570
1571 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001572 a.and16b(A::v4, A::v3, A::v1);
1573 a.orr16b(A::v4, A::v3, A::v1);
1574 a.eor16b(A::v4, A::v3, A::v1);
1575 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001576 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001577 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001578
1579 a.add4s(A::v4, A::v3, A::v1);
1580 a.sub4s(A::v4, A::v3, A::v1);
1581 a.mul4s(A::v4, A::v3, A::v1);
1582
Mike Klein97afd2e2019-10-16 14:11:27 -05001583 a.cmeq4s(A::v4, A::v3, A::v1);
1584 a.cmgt4s(A::v4, A::v3, A::v1);
1585
Mike Klein65809142019-06-25 09:44:02 -04001586 a.sub8h(A::v4, A::v3, A::v1);
1587 a.mul8h(A::v4, A::v3, A::v1);
1588
Mike Klein9f4df802019-06-24 18:47:16 -04001589 a.fadd4s(A::v4, A::v3, A::v1);
1590 a.fsub4s(A::v4, A::v3, A::v1);
1591 a.fmul4s(A::v4, A::v3, A::v1);
1592 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001593 a.fmin4s(A::v4, A::v3, A::v1);
1594 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein8d78da92020-11-25 13:53:20 -06001595
1596 a.fneg4s (A::v4, A::v3);
1597 a.fsqrt4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001598
Mike Klein65809142019-06-25 09:44:02 -04001599 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001600 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001601
1602 a.fcmeq4s(A::v4, A::v3, A::v1);
1603 a.fcmgt4s(A::v4, A::v3, A::v1);
1604 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001605 },{
Mike Klein65809142019-06-25 09:44:02 -04001606 0x64,0x1c,0x21,0x4e,
1607 0x64,0x1c,0xa1,0x4e,
1608 0x64,0x1c,0x21,0x6e,
1609 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001610 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001611 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001612
1613 0x64,0x84,0xa1,0x4e,
1614 0x64,0x84,0xa1,0x6e,
1615 0x64,0x9c,0xa1,0x4e,
1616
Mike Klein97afd2e2019-10-16 14:11:27 -05001617 0x64,0x8c,0xa1,0x6e,
1618 0x64,0x34,0xa1,0x4e,
1619
Mike Klein65809142019-06-25 09:44:02 -04001620 0x64,0x84,0x61,0x6e,
1621 0x64,0x9c,0x61,0x4e,
1622
Mike Klein9f4df802019-06-24 18:47:16 -04001623 0x64,0xd4,0x21,0x4e,
1624 0x64,0xd4,0xa1,0x4e,
1625 0x64,0xdc,0x21,0x6e,
1626 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001627 0x64,0xf4,0xa1,0x4e,
1628 0x64,0xf4,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001629
Mike Klein7c0332c2020-03-05 14:18:04 -06001630 0x64,0xf8,0xa0,0x6e,
Mike Klein8d78da92020-11-25 13:53:20 -06001631 0x64,0xf8,0xa1,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001632
Mike Klein65809142019-06-25 09:44:02 -04001633 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001634 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001635
1636 0x64,0xe4,0x21,0x4e,
1637 0x64,0xe4,0xa1,0x6e,
1638 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001639 });
1640
1641 test_asm(r, [&](A& a) {
1642 a.shl4s(A::v4, A::v3, 0);
1643 a.shl4s(A::v4, A::v3, 1);
1644 a.shl4s(A::v4, A::v3, 8);
1645 a.shl4s(A::v4, A::v3, 16);
1646 a.shl4s(A::v4, A::v3, 31);
1647
1648 a.sshr4s(A::v4, A::v3, 1);
1649 a.sshr4s(A::v4, A::v3, 8);
1650 a.sshr4s(A::v4, A::v3, 31);
1651
1652 a.ushr4s(A::v4, A::v3, 1);
1653 a.ushr4s(A::v4, A::v3, 8);
1654 a.ushr4s(A::v4, A::v3, 31);
1655
1656 a.ushr8h(A::v4, A::v3, 1);
1657 a.ushr8h(A::v4, A::v3, 8);
1658 a.ushr8h(A::v4, A::v3, 15);
1659 },{
1660 0x64,0x54,0x20,0x4f,
1661 0x64,0x54,0x21,0x4f,
1662 0x64,0x54,0x28,0x4f,
1663 0x64,0x54,0x30,0x4f,
1664 0x64,0x54,0x3f,0x4f,
1665
1666 0x64,0x04,0x3f,0x4f,
1667 0x64,0x04,0x38,0x4f,
1668 0x64,0x04,0x21,0x4f,
1669
1670 0x64,0x04,0x3f,0x6f,
1671 0x64,0x04,0x38,0x6f,
1672 0x64,0x04,0x21,0x6f,
1673
1674 0x64,0x04,0x1f,0x6f,
1675 0x64,0x04,0x18,0x6f,
1676 0x64,0x04,0x11,0x6f,
1677 });
1678
1679 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001680 a.sli4s(A::v4, A::v3, 0);
1681 a.sli4s(A::v4, A::v3, 1);
1682 a.sli4s(A::v4, A::v3, 8);
1683 a.sli4s(A::v4, A::v3, 16);
1684 a.sli4s(A::v4, A::v3, 31);
1685 },{
1686 0x64,0x54,0x20,0x6f,
1687 0x64,0x54,0x21,0x6f,
1688 0x64,0x54,0x28,0x6f,
1689 0x64,0x54,0x30,0x6f,
1690 0x64,0x54,0x3f,0x6f,
1691 });
1692
1693 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001694 a.scvtf4s (A::v4, A::v3);
1695 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001696 a.fcvtns4s(A::v4, A::v3);
Mike Klein8d78da92020-11-25 13:53:20 -06001697 a.frintp4s(A::v4, A::v3);
1698 a.frintm4s(A::v4, A::v3);
Mike Kleinec255632020-12-03 10:25:31 -06001699 a.fcvtn (A::v4, A::v3);
1700 a.fcvtl (A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001701 },{
1702 0x64,0xd8,0x21,0x4e,
1703 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001704 0x64,0xa8,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001705 0x64,0x88,0xa1,0x4e,
1706 0x64,0x98,0x21,0x4e,
Mike Kleinec255632020-12-03 10:25:31 -06001707 0x64,0x68,0x21,0x0e,
1708 0x64,0x78,0x21,0x0e,
Mike Klein9f4df802019-06-24 18:47:16 -04001709 });
Mike Klein15a368d2019-06-26 10:21:12 -04001710
1711 test_asm(r, [&](A& a) {
Mike Kleinb8e041e2020-04-17 11:30:29 -05001712 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1713 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1714 a.strq(A::v1, A::sp); // str q1, [sp]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001715 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001716 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001717 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001718 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1719 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001720 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001721 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001722 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001723 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1724 a.add (A::sp, A::sp, 32); // add sp, sp, #32
Mike Klein48e78242020-04-17 09:38:09 -05001725 },{
1726 0xff,0x83,0x00,0xd1,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001727 0xe0,0x07,0x80,0x3d,
1728 0xe1,0x03,0x80,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001729 0xe0,0x1b,0x00,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001730 0xe0,0x1b,0x00,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001731 0xe0,0x2b,0x00,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001732 0xe0,0xbf,0x00,0x3d,
1733 0xe9,0xab,0x40,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001734 0xe9,0xbf,0x40,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001735 0xe7,0x2b,0x40,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001736 0xe7,0x07,0x40,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001737 0xe5,0x03,0xc2,0x3d,
Mike Klein48e78242020-04-17 09:38:09 -05001738 0xff,0x83,0x00,0x91,
1739 });
1740
1741 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001742 a.brk(0);
1743 a.brk(65535);
1744
Mike Klein15a368d2019-06-26 10:21:12 -04001745 a.ret(A::x30); // Conventional ret using link register.
1746 a.ret(A::x13); // Can really return using any register if we like.
1747
1748 a.add(A::x2, A::x2, 4);
1749 a.add(A::x3, A::x2, 32);
1750
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001751 a.sub(A::x2, A::x2, 4);
1752 a.sub(A::x3, A::x2, 32);
1753
Mike Klein15a368d2019-06-26 10:21:12 -04001754 a.subs(A::x2, A::x2, 4);
1755 a.subs(A::x3, A::x2, 32);
1756
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001757 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1758 a.cmp(A::x2, 4);
1759
Mike Kleinc74db792020-05-11 11:57:12 -05001760 A::Label l;
1761 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001762 a.bne(&l);
1763 a.bne(&l);
1764 a.blt(&l);
1765 a.b(&l);
1766 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001767 a.cbz(A::x2, &l);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001768
1769 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1
1770 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3
Mike Klein15a368d2019-06-26 10:21:12 -04001771 },{
Mike Klein37be7712019-11-13 13:19:01 -06001772 0x00,0x00,0x20,0xd4,
1773 0xe0,0xff,0x3f,0xd4,
1774
Mike Klein15a368d2019-06-26 10:21:12 -04001775 0xc0,0x03,0x5f,0xd6,
1776 0xa0,0x01,0x5f,0xd6,
1777
1778 0x42,0x10,0x00,0x91,
1779 0x43,0x80,0x00,0x91,
1780
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001781 0x42,0x10,0x00,0xd1,
1782 0x43,0x80,0x00,0xd1,
1783
Mike Klein15a368d2019-06-26 10:21:12 -04001784 0x42,0x10,0x00,0xf1,
1785 0x43,0x80,0x00,0xf1,
1786
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001787 0x5f,0x10,0x00,0xf1,
1788 0x5f,0x10,0x00,0xf1,
1789
1790 0x01,0x00,0x00,0x54, // b.ne #0
1791 0xe1,0xff,0xff,0x54, // b.ne #-4
1792 0xcb,0xff,0xff,0x54, // b.lt #-8
1793 0xae,0xff,0xff,0x54, // b.al #-12
1794 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1795 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Kleindbc19ea2020-11-18 13:32:14 -06001796
1797 0x43,0x00,0x01,0x8b,
1798 0x43,0x0c,0x81,0x8b,
Mike Klein15a368d2019-06-26 10:21:12 -04001799 });
Mike Kleine51632e2019-06-26 14:47:43 -04001800
Mike Kleince7b88c2019-07-11 14:06:40 -05001801 // Can we cbz() to a not-yet-defined label?
1802 test_asm(r, [&](A& a) {
1803 A::Label l;
1804 a.cbz(A::x2, &l);
1805 a.add(A::x3, A::x2, 32);
1806 a.label(&l);
1807 a.ret(A::x30);
1808 },{
1809 0x42,0x00,0x00,0xb4, // cbz x2, #8
1810 0x43,0x80,0x00,0x91, // add x3, x2, #32
1811 0xc0,0x03,0x5f,0xd6, // ret
1812 });
1813
1814 // If we start a label as a backward label,
1815 // can we redefine it to be a future label?
1816 // (Not sure this is useful... just want to test it works.)
1817 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001818 A::Label l1;
1819 a.label(&l1);
Mike Kleince7b88c2019-07-11 14:06:40 -05001820 a.add(A::x3, A::x2, 32);
1821 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1822
Mike Kleinc74db792020-05-11 11:57:12 -05001823 A::Label l2; // Start off the same...
1824 a.label(&l2);
Mike Kleince7b88c2019-07-11 14:06:40 -05001825 a.add(A::x3, A::x2, 32);
1826 a.cbz(A::x2, &l2); // Looks like this will go backward...
1827 a.add(A::x2, A::x2, 4);
1828 a.add(A::x3, A::x2, 32);
1829 a.label(&l2); // But no... actually forward! What a switcheroo!
1830 },{
1831 0x43,0x80,0x00,0x91, // add x3, x2, #32
1832 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1833
1834 0x43,0x80,0x00,0x91, // add x3, x2, #32
1835 0x62,0x00,0x00,0xb4, // cbz x2, #12
1836 0x42,0x10,0x00,0x91, // add x2, x2, #4
1837 0x43,0x80,0x00,0x91, // add x3, x2, #32
1838 });
1839
Mike Klein81d52672019-07-30 11:11:09 -05001840 // Loading from a label on ARM.
1841 test_asm(r, [&](A& a) {
1842 A::Label fore,aft;
1843 a.label(&fore);
1844 a.word(0x01234567);
1845 a.ldrq(A::v1, &fore);
1846 a.ldrq(A::v2, &aft);
1847 a.label(&aft);
1848 a.word(0x76543210);
1849 },{
1850 0x67,0x45,0x23,0x01,
1851 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1852 0x22,0x00,0x00,0x9c, // ldr q2, #4
1853 0x10,0x32,0x54,0x76,
1854 });
1855
Mike Kleine51632e2019-06-26 14:47:43 -04001856 test_asm(r, [&](A& a) {
1857 a.ldrq(A::v0, A::x8);
1858 a.strq(A::v0, A::x8);
1859 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001860 0x00,0x01,0xc0,0x3d,
1861 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001862 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001863
1864 test_asm(r, [&](A& a) {
Mike Klein8d78da92020-11-25 13:53:20 -06001865 a.dup4s (A::v0, A::x8);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001866 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
1867 a.ld1r8h (A::v0, A::x8);
1868 a.ld1r16b(A::v0, A::x8);
1869 },{
Mike Klein8d78da92020-11-25 13:53:20 -06001870 0x00,0x0d,0x04,0x4e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001871 0x00,0xc9,0x40,0x4d,
1872 0x00,0xc5,0x40,0x4d,
1873 0x00,0xc1,0x40,0x4d,
1874 });
1875
1876 test_asm(r, [&](A& a) {
Mike Kleindd069a92021-01-20 13:51:33 -06001877 a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding
1878 a.ld44s(A::v0, A::x8);
1879 a.st24s(A::v0, A::x8);
1880 a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding
Mike Kleinf988bb52021-01-27 12:53:34 -06001881
1882 a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding
1883 a.ld24s(A::v0, A::x8, 1);
1884 a.ld24s(A::v0, A::x8, 2);
1885 a.ld24s(A::v0, A::x8, 3);
1886
1887 a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8]
1888 a.ld44s(A::v0, A::x8, 1);
1889 a.ld44s(A::v0, A::x8, 2);
1890 a.ld44s(A::v0, A::x8, 3);
Mike Kleindd069a92021-01-20 13:51:33 -06001891 },{
1892 0x00,0x89,0x40,0x4c,
1893 0x00,0x09,0x40,0x4c,
1894 0x00,0x89,0x00,0x4c,
1895 0x00,0x09,0x00,0x4c,
Mike Kleinf988bb52021-01-27 12:53:34 -06001896
1897 0x00,0x81,0x60,0x0d,
1898 0x00,0x91,0x60,0x0d,
1899 0x00,0x81,0x60,0x4d,
1900 0x00,0x91,0x60,0x4d,
1901
1902 0x00,0xa1,0x60,0x0d,
1903 0x00,0xb1,0x60,0x0d,
1904 0x00,0xa1,0x60,0x4d,
1905 0x00,0xb1,0x60,0x4d,
Mike Kleindd069a92021-01-20 13:51:33 -06001906 });
1907
1908 test_asm(r, [&](A& a) {
Mike Klein1fa149a2019-07-01 11:18:08 -05001909 a.xtns2h(A::v0, A::v0);
1910 a.xtnh2b(A::v0, A::v0);
1911 a.strs (A::v0, A::x0);
1912
1913 a.ldrs (A::v0, A::x0);
1914 a.uxtlb2h(A::v0, A::v0);
1915 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001916
1917 a.uminv4s(A::v3, A::v4);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001918 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0]
1919 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1]
1920 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3
Mike Klein1fa149a2019-07-01 11:18:08 -05001921 },{
1922 0x00,0x28,0x61,0x0e,
1923 0x00,0x28,0x21,0x0e,
1924 0x00,0x00,0x00,0xbd,
1925
1926 0x00,0x00,0x40,0xbd,
1927 0x00,0xa4,0x08,0x2f,
1928 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001929
1930 0x83,0xa8,0xb1,0x6e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001931 0x83,0x3c,0x04,0x0e,
1932 0x83,0x3c,0x0c,0x0e,
1933 0x64,0x1c,0x1c,0x4e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001934 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001935
1936 test_asm(r, [&](A& a) {
1937 a.ldrb(A::v0, A::x8);
1938 a.strb(A::v0, A::x8);
1939 },{
1940 0x00,0x01,0x40,0x3d,
1941 0x00,0x01,0x00,0x3d,
1942 });
Mike Klein81d52672019-07-30 11:11:09 -05001943
1944 test_asm(r, [&](A& a) {
Mike Kleindbc19ea2020-11-18 13:32:14 -06001945 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24]
1946 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12]
1947 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6]
1948 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3]
Mike Kleina7470df2020-12-03 12:06:27 -06001949
1950 a.strs(A::x0, A::x1, 3); // str w0, [x1, #12]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001951 },{
1952 0x20,0x0c,0x40,0xf9,
1953 0x20,0x0c,0x40,0xb9,
1954 0x20,0x0c,0x40,0x79,
1955 0x20,0x0c,0x40,0x39,
Mike Kleina7470df2020-12-03 12:06:27 -06001956
1957 0x20,0x0c,0x00,0xb9,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001958 });
1959
1960 test_asm(r, [&](A& a) {
Mike Kleinf5097db2020-12-03 09:21:00 -06001961 a.tbl (A::v0, A::v1, A::v2);
Mike Kleinc7bca522020-12-03 10:01:29 -06001962 a.uzp14s(A::v0, A::v1, A::v2);
1963 a.uzp24s(A::v0, A::v1, A::v2);
Mike Kleinf5097db2020-12-03 09:21:00 -06001964 a.zip14s(A::v0, A::v1, A::v2);
1965 a.zip24s(A::v0, A::v1, A::v2);
Mike Klein81d52672019-07-30 11:11:09 -05001966 },{
1967 0x20,0x00,0x02,0x4e,
Mike Kleinc7bca522020-12-03 10:01:29 -06001968 0x20,0x18,0x82,0x4e,
1969 0x20,0x58,0x82,0x4e,
Mike Kleinf5097db2020-12-03 09:21:00 -06001970 0x20,0x38,0x82,0x4e,
1971 0x20,0x78,0x82,0x4e,
Mike Klein81d52672019-07-30 11:11:09 -05001972 });
Mike Klein05642042019-06-18 12:16:06 -05001973}
Mike Reedbcb46c02020-03-23 17:51:01 -04001974
1975DEF_TEST(SkVM_approx_math, r) {
1976 auto eval = [](int N, float values[], auto fn) {
1977 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06001978 skvm::Ptr inout = b.varying<float>();
Mike Reedbcb46c02020-03-23 17:51:01 -04001979
1980 b.storeF(inout, fn(&b, b.loadF(inout)));
1981
1982 b.done().eval(N, values);
1983 };
1984
1985 auto compare = [r](int N, const float values[], const float expected[]) {
1986 for (int i = 0; i < N; ++i) {
1987 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1988 }
1989 };
1990
1991 // log2
1992 {
1993 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1994 constexpr int N = SK_ARRAY_COUNT(values);
1995 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1996 return b->approx_log2(v);
1997 });
1998 const float expected[] = {-2, -1, 0, 1, 2, 3};
1999 compare(N, values, expected);
2000 }
2001
2002 // pow2
2003 {
2004 float values[] = {-2, -1, 0, 1, 2, 3};
2005 constexpr int N = SK_ARRAY_COUNT(values);
2006 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2007 return b->approx_pow2(v);
2008 });
2009 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
2010 compare(N, values, expected);
2011 }
2012
2013 // powf -- x^0.5
2014 {
2015 float bases[] = {0, 1, 4, 9, 16};
2016 constexpr int N = SK_ARRAY_COUNT(bases);
2017 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2018 return b->approx_powf(base, b->splat(0.5f));
2019 });
2020 const float expected[] = {0, 1, 2, 3, 4};
2021 compare(N, bases, expected);
2022 }
2023 // powf -- 3^x
2024 {
2025 float exps[] = {-2, -1, 0, 1, 2};
2026 constexpr int N = SK_ARRAY_COUNT(exps);
2027 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2028 return b->approx_powf(b->splat(3.0f), exp);
2029 });
2030 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
2031 compare(N, exps, expected);
2032 }
Mike Reed82ff25e2020-04-07 13:51:41 -04002033
Mike Reedd468a162020-04-11 14:14:00 -04002034 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04002035 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002036 skvm::Ptr inout = b.varying<float>();
Mike Reed82ff25e2020-04-07 13:51:41 -04002037 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04002038 float actual = arg;
2039 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04002040
Mike Reedd468a162020-04-11 14:14:00 -04002041 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04002042
2043 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04002044 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04002045 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04002046 }
Mike Reed1b84ef22020-04-13 17:56:24 -04002047 return err;
2048 };
2049
2050 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2051 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002052 skvm::Ptr in0 = b.varying<float>();
2053 skvm::Ptr in1 = b.varying<float>();
2054 skvm::Ptr out = b.varying<float>();
Mike Reed1b84ef22020-04-13 17:56:24 -04002055 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2056 float actual;
2057 b.done().eval(1, &arg0, &arg1, &actual);
2058
2059 float err = std::abs(actual - expected);
2060
2061 if (err > tolerance) {
2062 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2063 REPORTER_ASSERT(r, true);
2064 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002065 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04002066 };
2067
Mike Reed801ba0d2020-04-10 12:37:36 -04002068 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04002069 {
2070 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04002071 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04002072 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2073 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2074 return approx_sin(x);
2075 });
2076 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2077 return approx_cos(x);
2078 });
2079 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002080
2081 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2082 // so bring in the domain a little.
2083 constexpr float eps = 0.16f;
2084 float err = 0;
2085 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2086 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2087 return approx_tan(x);
2088 });
2089 // try again with some multiples of P, to check our periodicity
2090 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2091 return approx_tan(x + 3*P);
2092 });
2093 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2094 return approx_tan(x - 3*P);
2095 });
2096 }
Mike Reedd468a162020-04-11 14:14:00 -04002097 if (0) { SkDebugf("tan error %g\n", err); }
2098 }
2099
2100 // asin, acos, atan
2101 {
2102 constexpr float tol = 0.00175f;
2103 float err = 0;
2104 for (float x = -1; x <= 1; x += 1.0f/64) {
2105 err += test(x, asin(x), tol, [](skvm::F32 x) {
2106 return approx_asin(x);
2107 });
2108 test(x, acos(x), tol, [](skvm::F32 x) {
2109 return approx_acos(x);
2110 });
Mike Reed801ba0d2020-04-10 12:37:36 -04002111 }
Mike Reedd468a162020-04-11 14:14:00 -04002112 if (0) { SkDebugf("asin error %g\n", err); }
2113
2114 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04002115 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04002116 err += test(x, atan(x), tol, [](skvm::F32 x) {
2117 return approx_atan(x);
2118 });
2119 }
2120 if (0) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04002121
2122 for (float y = -3; y <= 3; y += 1) {
2123 for (float x = -3; x <= 3; x += 1) {
2124 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
Mike Klein962020f2020-05-01 10:35:18 -05002125 return approx_atan2(y,x);
Mike Reed1b84ef22020-04-13 17:56:24 -04002126 });
2127 }
2128 }
2129 if (0) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04002130 }
Mike Reedbcb46c02020-03-23 17:51:01 -04002131}
Mike Klein210288f2020-04-08 11:31:07 -05002132
2133DEF_TEST(SkVM_min_max, r) {
2134 // min() and max() have subtle behavior when one argument is NaN and
2135 // the other isn't. It's not sound to blindly swap their arguments.
2136 //
2137 // All backends must behave like std::min() and std::max(), which are
2138 //
2139 // min(x,y) = y<x ? y : x
2140 // max(x,y) = x<y ? y : x
2141
2142 // ±NaN, ±0, ±1, ±inf
2143 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2144 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2145
2146 float f[8];
2147 memcpy(f, bits, sizeof(bits));
2148
2149 auto identical = [&](float x, float y) {
2150 uint32_t X,Y;
2151 memcpy(&X, &x, 4);
2152 memcpy(&Y, &y, 4);
2153 return X == Y;
2154 };
2155
2156 // Test min/max with non-constant x, non-constant y.
2157 // (Whether x and y are varying or uniform shouldn't make any difference.)
2158 {
2159 skvm::Builder b;
2160 {
Mike Klein00e43df2021-01-08 13:45:42 -06002161 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002162 mn = b.varying<float>(),
2163 mx = b.varying<float>();
2164
2165 skvm::F32 x = b.loadF(src),
2166 y = b.uniformF(b.uniform(), 0);
2167
2168 b.storeF(mn, b.min(x,y));
2169 b.storeF(mx, b.max(x,y));
2170 }
2171
Mike Kleinfc017c72021-02-08 10:45:19 -06002172 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002173 float mn[8], mx[8];
2174 for (int i = 0; i < 8; i++) {
2175 // min() and max() everything with f[i].
2176 program.eval(8, f,mn,mx, &f[i]);
2177
2178 for (int j = 0; j < 8; j++) {
2179 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2180 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2181 }
2182 }
2183 });
2184 }
2185
2186 // Test each with constant on the right.
2187 for (int i = 0; i < 8; i++) {
2188 skvm::Builder b;
2189 {
Mike Klein00e43df2021-01-08 13:45:42 -06002190 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002191 mn = b.varying<float>(),
2192 mx = b.varying<float>();
2193
2194 skvm::F32 x = b.loadF(src),
2195 y = b.splat(f[i]);
2196
2197 b.storeF(mn, b.min(x,y));
2198 b.storeF(mx, b.max(x,y));
2199 }
2200
Mike Kleinfc017c72021-02-08 10:45:19 -06002201 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002202 float mn[8], mx[8];
2203 program.eval(8, f,mn,mx);
2204 for (int j = 0; j < 8; j++) {
2205 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2206 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2207 }
2208 });
2209 }
2210
2211 // Test each with constant on the left.
2212 for (int i = 0; i < 8; i++) {
2213 skvm::Builder b;
2214 {
Mike Klein00e43df2021-01-08 13:45:42 -06002215 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002216 mn = b.varying<float>(),
2217 mx = b.varying<float>();
2218
2219 skvm::F32 x = b.splat(f[i]),
2220 y = b.loadF(src);
2221
2222 b.storeF(mn, b.min(x,y));
2223 b.storeF(mx, b.max(x,y));
2224 }
2225
Mike Kleinfc017c72021-02-08 10:45:19 -06002226 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002227 float mn[8], mx[8];
2228 program.eval(8, f,mn,mx);
2229 for (int j = 0; j < 8; j++) {
2230 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2231 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2232 }
2233 });
2234 }
2235}
Mike Klein4d680cd2020-07-15 09:58:51 -05002236
2237DEF_TEST(SkVM_halfs, r) {
2238 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2239 0xc400,0xb800,0xbc00,0xc000};
2240 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2241 -4.0f,-0.5f,-1.0f,-2.0f};
2242 {
2243 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002244 skvm::Ptr src = b.varying<uint16_t>(),
Mike Klein4d680cd2020-07-15 09:58:51 -05002245 dst = b.varying<float>();
Mike Klein42d67a62020-12-01 10:14:55 -06002246 b.storeF(dst, b.from_fp16(b.load16(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002247
Mike Kleinfc017c72021-02-08 10:45:19 -06002248 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein4d680cd2020-07-15 09:58:51 -05002249 float dst[8];
2250 program.eval(8, hs, dst);
2251 for (int i = 0; i < 8; i++) {
2252 REPORTER_ASSERT(r, dst[i] == fs[i]);
2253 }
2254 });
2255 }
2256 {
2257 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002258 skvm::Ptr src = b.varying<float>(),
Mike Klein4d680cd2020-07-15 09:58:51 -05002259 dst = b.varying<uint16_t>();
Mike Klein42d67a62020-12-01 10:14:55 -06002260 b.store16(dst, b.to_fp16(b.loadF(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002261
Mike Kleinfc017c72021-02-08 10:45:19 -06002262 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein4d680cd2020-07-15 09:58:51 -05002263 uint16_t dst[8];
2264 program.eval(8, fs, dst);
2265 for (int i = 0; i < 8; i++) {
2266 REPORTER_ASSERT(r, dst[i] == hs[i]);
2267 }
2268 });
2269 }
2270}
Mike Klein6732da02020-07-16 13:03:18 -05002271
2272DEF_TEST(SkVM_64bit, r) {
2273 uint32_t lo[65],
2274 hi[65];
2275 uint64_t wide[65];
2276 for (int i = 0; i < 65; i++) {
2277 lo[i] = 2*i+0;
2278 hi[i] = 2*i+1;
2279 wide[i] = ((uint64_t)lo[i] << 0)
2280 | ((uint64_t)hi[i] << 32);
2281 }
2282
2283 {
2284 skvm::Builder b;
2285 {
John Stiles68f56062021-08-03 12:31:56 -04002286 skvm::Ptr widePtr = b.varying<uint64_t>(),
2287 loPtr = b.varying<int>(),
2288 hiPtr = b.varying<int>();
2289 b.store32(loPtr, b.load64(widePtr, 0));
2290 b.store32(hiPtr, b.load64(widePtr, 1));
Mike Klein6732da02020-07-16 13:03:18 -05002291 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002292 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein6732da02020-07-16 13:03:18 -05002293 uint32_t l[65], h[65];
2294 program.eval(65, wide,l,h);
2295 for (int i = 0; i < 65; i++) {
2296 REPORTER_ASSERT(r, l[i] == lo[i]);
2297 REPORTER_ASSERT(r, h[i] == hi[i]);
2298 }
2299 });
2300 }
2301
2302 {
2303 skvm::Builder b;
2304 {
John Stiles68f56062021-08-03 12:31:56 -04002305 skvm::Ptr widePtr = b.varying<uint64_t>(),
2306 loPtr = b.varying<int>(),
2307 hiPtr = b.varying<int>();
2308 b.store64(widePtr, b.load32(loPtr), b.load32(hiPtr));
Mike Klein6732da02020-07-16 13:03:18 -05002309 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002310 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein6732da02020-07-16 13:03:18 -05002311 uint64_t w[65];
2312 program.eval(65, w,lo,hi);
2313 for (int i = 0; i < 65; i++) {
2314 REPORTER_ASSERT(r, w[i] == wide[i]);
2315 }
2316 });
2317 }
2318}
Mike Kleine942b8c2020-07-21 10:17:14 -05002319
Mike Kleinb19518d2020-12-03 14:39:41 -06002320DEF_TEST(SkVM_128bit, r) {
2321 float floats[4*63];
2322 uint8_t packed[4*63];
2323
2324 for (int i = 0; i < 4*63; i++) {
2325 floats[i] = i * (1/255.0f);
2326 }
2327
Mike Klein447f3312021-02-08 09:46:59 -06002328 skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType),
2329 rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType);
Mike Kleinb19518d2020-12-03 14:39:41 -06002330
2331 { // Convert RGBA F32 to RGBA 8888, testing 128-bit loads.
2332 skvm::Builder b;
2333 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002334 skvm::Ptr dst = b.varying(4),
2335 src = b.varying(16);
Mike Kleinb19518d2020-12-03 14:39:41 -06002336
2337 skvm::Color c = b.load(rgba_ffff, src);
2338 b.store(rgba_8888, dst, c);
2339 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002340 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleinb19518d2020-12-03 14:39:41 -06002341 memset(packed, 0, sizeof(packed));
2342 program.eval(63, packed, floats);
2343 for (int i = 0; i < 4*63; i++) {
2344 REPORTER_ASSERT(r, packed[i] == i);
2345 }
2346 });
2347 }
2348
2349
2350 { // Convert RGBA 8888 to RGBA F32, testing 128-bit stores.
2351 skvm::Builder b;
2352 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002353 skvm::Ptr dst = b.varying(16),
2354 src = b.varying(4);
Mike Kleinb19518d2020-12-03 14:39:41 -06002355
2356 skvm::Color c = b.load(rgba_8888, src);
2357 b.store(rgba_ffff, dst, c);
2358 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002359 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleinb19518d2020-12-03 14:39:41 -06002360 memset(floats, 0, sizeof(floats));
2361 program.eval(63, floats, packed);
2362 for (int i = 0; i < 4*63; i++) {
2363 REPORTER_ASSERT(r, floats[i] == i * (1/255.0f));
2364 }
2365 });
2366 }
2367
2368}
2369
Mike Kleine942b8c2020-07-21 10:17:14 -05002370DEF_TEST(SkVM_is_NaN_is_finite, r) {
2371 skvm::Builder b;
2372 {
Mike Klein00e43df2021-01-08 13:45:42 -06002373 skvm::Ptr src = b.varying<float>(),
Mike Kleine942b8c2020-07-21 10:17:14 -05002374 nan = b.varying<int>(),
2375 fin = b.varying<int>();
2376 b.store32(nan, is_NaN (b.loadF(src)));
2377 b.store32(fin, is_finite(b.loadF(src)));
2378 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002379 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleine942b8c2020-07-21 10:17:14 -05002380 // ±NaN, ±0, ±1, ±inf
2381 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2382 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2383 uint32_t nan[8], fin[8];
2384 program.eval(8, bits, nan,fin);
2385
2386 for (int i = 0; i < 8; i++) {
2387 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2388 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2389 i == 4 || i == 5) ? 0xffffffff : 0));
2390 }
2391 });
2392}
Mike Klein0cfd5032020-07-28 11:08:27 -05002393
2394DEF_TEST(SkVM_args, r) {
2395 // Test we can handle at least six arguments.
2396 skvm::Builder b;
2397 {
Mike Klein00e43df2021-01-08 13:45:42 -06002398 skvm::Ptr dst = b.varying<float>(),
Mike Klein0cfd5032020-07-28 11:08:27 -05002399 A = b.varying<float>(),
2400 B = b.varying<float>(),
2401 C = b.varying<float>(),
2402 D = b.varying<float>(),
2403 E = b.varying<float>();
2404 storeF(dst, b.loadF(A)
2405 + b.loadF(B)
2406 + b.loadF(C)
2407 + b.loadF(D)
2408 + b.loadF(E));
2409 }
2410
Mike Kleinfc017c72021-02-08 10:45:19 -06002411 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein0cfd5032020-07-28 11:08:27 -05002412 float dst[17],A[17],B[17],C[17],D[17],E[17];
2413 for (int i = 0; i < 17; i++) {
2414 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2415 }
2416 program.eval(17, dst,A,B,C,D,E);
2417 for (int i = 0; i < 17; i++) {
2418 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2419 }
2420 });
2421}
Mike Klein9791e502020-09-15 12:43:38 -05002422
John Stiles68f56062021-08-03 12:31:56 -04002423DEF_TEST(SkVM_badpack, reporter) {
Mike Kleinee40ec62020-11-20 15:34:16 -06002424 // Test case distilled from actual failing draw,
2425 // originally with a bad arm64 implementation of pack().
2426 skvm::Builder p;
2427 {
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002428 skvm::UPtr uniforms = p.uniform();
2429 skvm::Ptr dst = p.varying<uint16_t>();
Mike Kleinee40ec62020-11-20 15:34:16 -06002430
Mike Klein5ec9c4e2020-12-01 10:43:46 -06002431 skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
Mike Kleinee40ec62020-11-20 15:34:16 -06002432 a = p.splat(0xf);
2433
2434 skvm::I32 _4444 = p.splat(0);
2435 _4444 = pack(_4444, r, 12);
2436 _4444 = pack(_4444, a, 0);
2437 store16(dst, _4444);
2438 }
2439
Mike Kleinfc017c72021-02-08 10:45:19 -06002440 test_jit_and_interpreter(p, [&](const skvm::Program& program){
Mike Kleinee40ec62020-11-20 15:34:16 -06002441 const float uniforms[] = { 0.0f, 0.0f,
2442 1.0f, 0.0f, 0.0f, 1.0f };
2443
2444 uint16_t dst[17] = {0};
2445 program.eval(17, uniforms,dst);
2446 for (int i = 0; i < 17; i++) {
John Stiles68f56062021-08-03 12:31:56 -04002447 REPORTER_ASSERT(reporter, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
Mike Kleinee40ec62020-11-20 15:34:16 -06002448 }
2449 });
2450}
Mike Klein960bd2d2020-12-21 14:33:55 -06002451
2452DEF_TEST(SkVM_features, r) {
2453 auto build_program = [](skvm::Builder* b) {
2454 skvm::F32 x = b->loadF(b->varying<float>());
2455 b->storeF(b->varying<float>(), x*x+x);
2456 };
2457
2458 { // load-fma-store with FMA available.
2459 skvm::Features features;
2460 features.fma = true;
2461 skvm::Builder b(features);
2462 build_program(&b);
2463 REPORTER_ASSERT(r, b.optimize().size() == 3);
2464 }
2465
2466 { // load-mul-add-store without FMA.
2467 skvm::Features features;
2468 features.fma = false;
2469 skvm::Builder b(features);
2470 build_program(&b);
2471 REPORTER_ASSERT(r, b.optimize().size() == 4);
2472 }
2473
2474 { // Auto-detected, could be either.
2475 skvm::Builder b;
2476 build_program(&b);
2477 REPORTER_ASSERT(r, b.optimize().size() == 3
2478 || b.optimize().size() == 4);
2479 }
2480}
Mike Klein0a804272021-01-06 10:36:22 -06002481
2482DEF_TEST(SkVM_gather_can_hoist, r) {
2483 // A gather instruction isn't necessarily varying... it's whatever its index is.
2484 // First a typical gather scenario with varying index.
2485 {
2486 skvm::Builder b;
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002487 skvm::UPtr uniforms = b.uniform();
2488 skvm::Ptr buf = b.varying<int>();
Mike Klein0a804272021-01-06 10:36:22 -06002489 skvm::I32 ix = b.load32(buf);
2490 b.store32(buf, b.gather32(uniforms,0, ix));
2491
2492 skvm::Program p = b.done();
2493
2494 // ix is varying, so the gather is too.
2495 //
2496 // loop:
2497 // v0 = load32 buf
2498 // v1 = gather32 uniforms+0 v0
2499 // store32 buf v1
2500 REPORTER_ASSERT(r, p.instructions().size() == 3);
2501 REPORTER_ASSERT(r, p.loop() == 0);
2502 }
2503
2504 // Now the same but with a uniform index instead.
2505 {
2506 skvm::Builder b;
Herb Derbyf0efa1d2021-08-03 16:43:14 -04002507 skvm::UPtr uniforms = b.uniform();
2508 skvm::Ptr buf = b.varying<int>();
Mike Klein0a804272021-01-06 10:36:22 -06002509 skvm::I32 ix = b.uniform32(uniforms,8);
2510 b.store32(buf, b.gather32(uniforms,0, ix));
2511
2512 skvm::Program p = b.done();
2513
2514 // ix is uniform, so the gather is too.
2515 //
2516 // v0 = uniform32 uniforms+8
2517 // v1 = gather32 uniforms+0 v0
2518 // loop:
2519 // store32 buf v1
2520 REPORTER_ASSERT(r, p.instructions().size() == 3);
2521 REPORTER_ASSERT(r, p.loop() == 2);
2522 }
2523}
Mike Klein279ca2e2021-01-06 10:57:19 -06002524
2525DEF_TEST(SkVM_dont_dedup_loads, r) {
2526 // We've been assuming that all Ops with the same arguments produce the same value
2527 // and deduplicating them, which results in a simple common subexpression eliminator.
2528 //
2529 // But we can't soundly dedup two identical loads with a store between.
2530 // If we dedup the loads in this test program it will always increment by 1, not K.
2531 constexpr int K = 2;
2532 skvm::Builder b;
2533 {
Mike Klein00e43df2021-01-08 13:45:42 -06002534 skvm::Ptr buf = b.varying<int>();
Mike Klein279ca2e2021-01-06 10:57:19 -06002535 for (int i = 0; i < K; i++) {
2536 b.store32(buf, b.load32(buf) + 1);
2537 }
2538 }
2539
Mike Kleinfc017c72021-02-08 10:45:19 -06002540 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein279ca2e2021-01-06 10:57:19 -06002541 int buf[] = { 0,1,2,3,4 };
2542 program.eval(SK_ARRAY_COUNT(buf), buf);
2543 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
2544 REPORTER_ASSERT(r, buf[i] == i+K);
2545 }
2546 });
2547}
2548
2549DEF_TEST(SkVM_dont_dedup_stores, r) {
2550 // Following a similar line of reasoning to SkVM_dont_dedup_loads,
2551 // we cannot dedup stores either. A different store between two identical stores
2552 // will invalidate the first store, meaning we do need to reissue that store operation.
2553 skvm::Builder b;
2554 {
Mike Klein00e43df2021-01-08 13:45:42 -06002555 skvm::Ptr buf = b.varying<int>();
Mike Klein279ca2e2021-01-06 10:57:19 -06002556 b.store32(buf, b.splat(4));
2557 b.store32(buf, b.splat(5));
2558 b.store32(buf, b.splat(4)); // If we dedup'd, we'd skip this store.
2559 }
2560
Mike Kleinfc017c72021-02-08 10:45:19 -06002561 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein279ca2e2021-01-06 10:57:19 -06002562 int buf[42];
2563 program.eval(SK_ARRAY_COUNT(buf), buf);
2564 for (int x : buf) {
2565 REPORTER_ASSERT(r, x == 4);
2566 }
2567 });
2568}
Mike Kleinff4decc2021-02-10 16:13:35 -06002569
2570DEF_TEST(SkVM_fast_mul, r) {
2571 skvm::Builder b;
2572 {
2573 skvm::Ptr src = b.varying<float>(),
2574 fast = b.varying<float>(),
2575 slow = b.varying<float>();
2576 skvm::F32 x = b.loadF(src);
2577 b.storeF(fast, fast_mul(0.0f, x));
2578 b.storeF(slow, 0.0f * x);
2579 }
2580 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2581 const uint32_t bits[] = {
2582 0x0000'0000, 0x8000'0000, //±0
2583 0x3f80'0000, 0xbf80'0000, //±1
2584 0x7f80'0000, 0xff80'0000, //±inf
2585 0x7f80'0001, 0xff80'0001, //±NaN
2586 };
2587 float fast[8],
2588 slow[8];
2589 program.eval(8,bits,fast,slow);
2590
2591 for (int i = 0; i < 8; i++) {
2592 REPORTER_ASSERT(r, fast[i] == 0.0f);
2593
2594 if (i < 4) {
2595 REPORTER_ASSERT(r, slow[i] == 0.0f);
2596 } else {
2597 REPORTER_ASSERT(r, isnan(slow[i]));
2598 }
2599 }
2600 });
2601}