blob: 3d7ef42fec38390170824fe378e4072bfbdebe11 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein7e650762019-07-02 15:21:11 -050014
Mike Klein9977efa2019-07-15 12:22:36 -050015template <typename Fn>
Mike Kleinfc017c72021-02-08 10:45:19 -060016static void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) {
17 skvm::Program p = b.done();
18 test(p);
19 if (p.hasJIT()) {
20 test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false));
Mike Kleinb5a30762019-10-16 10:11:56 -050021 }
Mike Kleinb5a30762019-10-16 10:11:56 -050022}
23
Mike Klein7542ab52020-04-02 08:50:16 -050024DEF_TEST(SkVM_eliminate_dead_code, r) {
25 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -040026 {
Mike Klein00e43df2021-01-08 13:45:42 -060027 skvm::Ptr arg = b.varying<int>();
Mike Klein7542ab52020-04-02 08:50:16 -050028 skvm::I32 l = b.load32(arg);
29 skvm::I32 a = b.add(l, l);
30 b.add(a, b.splat(7));
31 }
Herb Derbyf20400e2020-03-18 16:11:25 -040032
Mike Klein7542ab52020-04-02 08:50:16 -050033 std::vector<skvm::Instruction> program = b.program();
34 REPORTER_ASSERT(r, program.size() == 4);
35
Mike Klein5b701e12020-04-02 10:34:24 -050036 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -050037 REPORTER_ASSERT(r, program.size() == 0);
38}
39
Mike Klein9fdadb92019-07-30 12:30:13 -050040DEF_TEST(SkVM_Pointless, r) {
41 // Let's build a program with no memory arguments.
42 // It should all be pegged as dead code, but we should be able to "run" it.
43 skvm::Builder b;
44 {
45 b.add(b.splat(5.0f),
46 b.splat(4.0f));
47 }
48
Mike Kleinfc017c72021-02-08 10:45:19 -060049 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -050050 for (int N = 0; N < 64; N++) {
51 program.eval(N);
52 }
53 });
54
Mike Kleined9b1f12020-02-06 13:02:32 -060055 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -050056 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -050057 }
58}
59
Mike Klein10fc1e62020-04-13 11:57:05 -050060DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -060061 skvm::Builder b;
62 b.store32(b.varying<int>(), b.splat(42));
63
Mike Kleinfc017c72021-02-08 10:45:19 -060064 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
Mike Klein10fc1e62020-04-13 11:57:05 -050065 int buf[18];
66 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -060067
Mike Klein10fc1e62020-04-13 11:57:05 -050068 p.eval(17, buf);
69 for (int i = 0; i < 17; i++) {
70 REPORTER_ASSERT(r, buf[i] == 42);
71 }
72 REPORTER_ASSERT(r, buf[17] == 47);
73 });
Mike Kleinb6149312020-02-26 13:04:23 -060074}
Mike Klein11efa182020-02-27 12:04:37 -060075
Mike Klein10fc1e62020-04-13 11:57:05 -050076DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -060077 skvm::Builder b;
78 {
79 auto src = b.varying<int>(),
80 dst = b.varying<int>();
81 b.store32(dst, b.load32(src));
82 }
83
Mike Kleinfc017c72021-02-08 10:45:19 -060084 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
Mike Klein10fc1e62020-04-13 11:57:05 -050085 int src[] = {1,2,3,4,5,6,7,8,9},
86 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -060087
Mike Klein10fc1e62020-04-13 11:57:05 -050088 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
89 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
90 REPORTER_ASSERT(r, dst[i] == src[i]);
91 }
92 size_t i = SK_ARRAY_COUNT(src)-1;
93 REPORTER_ASSERT(r, dst[i] == 0);
94 });
Mike Klein11efa182020-02-27 12:04:37 -060095}
Mike Kleinb6149312020-02-26 13:04:23 -060096
Mike Kleinc7c1f9c2021-02-08 10:24:52 -060097DEF_TEST(SkVM_allow_jit, r) {
98 skvm::Builder b;
99 {
100 auto src = b.varying<int>(),
101 dst = b.varying<int>();
102 b.store32(dst, b.load32(src));
103 }
104
105 if (b.done("", /*allow_jit=*/true).hasJIT()) {
106 REPORTER_ASSERT(r, !b.done("", false).hasJIT());
107 }
108}
109
Mike Klein81756e42019-06-12 11:36:28 -0500110DEF_TEST(SkVM_LoopCounts, r) {
111 // Make sure we cover all the exact N we want.
112
Mike Klein9977efa2019-07-15 12:22:36 -0500113 // buf[i] += 1
114 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -0600115 skvm::Ptr arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500116 b.store32(arg,
117 b.add(b.splat(1),
118 b.load32(arg)));
119
Mike Kleinfc017c72021-02-08 10:45:19 -0600120 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500121 int buf[64];
122 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500123 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
124 buf[i] = i;
125 }
126 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500127
Mike Klein9977efa2019-07-15 12:22:36 -0500128 for (int i = 0; i < N; i++) {
129 REPORTER_ASSERT(r, buf[i] == i+1);
130 }
131 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
132 REPORTER_ASSERT(r, buf[i] == i);
133 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500134 }
135 });
Mike Klein81756e42019-06-12 11:36:28 -0500136}
Mike Klein05642042019-06-18 12:16:06 -0500137
Mike Kleinb2b6a992020-01-13 16:34:30 -0600138DEF_TEST(SkVM_gather32, r) {
139 skvm::Builder b;
140 {
Mike Klein00e43df2021-01-08 13:45:42 -0600141 skvm::Ptr uniforms = b.uniform(),
Mike Kleinb2b6a992020-01-13 16:34:30 -0600142 buf = b.varying<int>();
143 skvm::I32 x = b.load32(buf);
144 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
145 }
146
Mike Kleinfc017c72021-02-08 10:45:19 -0600147 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600148 const int img[] = {12,34,56,78, 90,98,76,54};
149
150 int buf[20];
151 for (int i = 0; i < 20; i++) {
152 buf[i] = i;
153 }
154
155 struct Uniforms {
156 const int* img;
157 } uniforms{img};
158
159 program.eval(20, &uniforms, buf);
160 int i = 0;
161 REPORTER_ASSERT(r, buf[i] == 12); i++;
162 REPORTER_ASSERT(r, buf[i] == 34); i++;
163 REPORTER_ASSERT(r, buf[i] == 56); i++;
164 REPORTER_ASSERT(r, buf[i] == 78); i++;
165 REPORTER_ASSERT(r, buf[i] == 90); i++;
166 REPORTER_ASSERT(r, buf[i] == 98); i++;
167 REPORTER_ASSERT(r, buf[i] == 76); i++;
168 REPORTER_ASSERT(r, buf[i] == 54); i++;
169
170 REPORTER_ASSERT(r, buf[i] == 12); i++;
171 REPORTER_ASSERT(r, buf[i] == 34); i++;
172 REPORTER_ASSERT(r, buf[i] == 56); i++;
173 REPORTER_ASSERT(r, buf[i] == 78); i++;
174 REPORTER_ASSERT(r, buf[i] == 90); i++;
175 REPORTER_ASSERT(r, buf[i] == 98); i++;
176 REPORTER_ASSERT(r, buf[i] == 76); i++;
177 REPORTER_ASSERT(r, buf[i] == 54); i++;
178
179 REPORTER_ASSERT(r, buf[i] == 12); i++;
180 REPORTER_ASSERT(r, buf[i] == 34); i++;
181 REPORTER_ASSERT(r, buf[i] == 56); i++;
182 REPORTER_ASSERT(r, buf[i] == 78); i++;
183 });
184}
185
Mike Klein81d52672019-07-30 11:11:09 -0500186DEF_TEST(SkVM_gathers, r) {
187 skvm::Builder b;
188 {
Mike Klein00e43df2021-01-08 13:45:42 -0600189 skvm::Ptr uniforms = b.uniform(),
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600190 buf32 = b.varying<int>(),
191 buf16 = b.varying<uint16_t>(),
192 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500193
194 skvm::I32 x = b.load32(buf32);
195
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600196 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
197 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
198 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500199 }
200
Mike Kleinfc017c72021-02-08 10:45:19 -0600201 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500202 const int img[] = {12,34,56,78, 90,98,76,54};
203
204 constexpr int N = 20;
205 int buf32[N];
206 uint16_t buf16[N];
207 uint8_t buf8 [N];
208
209 for (int i = 0; i < 20; i++) {
210 buf32[i] = i;
211 }
212
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600213 struct Uniforms {
214 const int* img;
215 } uniforms{img};
216
217 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500218 int i = 0;
219 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
220 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
221 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
222 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
223 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
224 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
225 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
226 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
227
228 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
229 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
230 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
231 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
232 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
233 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
234 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
235 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
236
237 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
238 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
239 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
240 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
241 });
242}
243
Mike Klein21e85eb2020-04-17 13:57:13 -0500244DEF_TEST(SkVM_gathers2, r) {
245 skvm::Builder b;
246 {
Mike Klein00e43df2021-01-08 13:45:42 -0600247 skvm::Ptr uniforms = b.uniform(),
Mike Klein21e85eb2020-04-17 13:57:13 -0500248 buf32 = b.varying<int>(),
249 buf16 = b.varying<uint16_t>(),
250 buf8 = b.varying<uint8_t>();
251
252 skvm::I32 x = b.load32(buf32);
253
254 b.store32(buf32, b.gather32(uniforms,0, x));
255 b.store16(buf16, b.gather16(uniforms,0, x));
256 b.store8 (buf8 , b.gather8 (uniforms,0, x));
257 }
258
Mike Kleinfc017c72021-02-08 10:45:19 -0600259 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein21e85eb2020-04-17 13:57:13 -0500260 uint8_t img[256];
261 for (int i = 0; i < 256; i++) {
262 img[i] = i;
263 }
264
265 int buf32[64];
266 uint16_t buf16[64];
267 uint8_t buf8 [64];
268
269 for (int i = 0; i < 64; i++) {
270 buf32[i] = (i*47)&63;
271 buf16[i] = 0;
272 buf8 [i] = 0;
273 }
274
275 struct Uniforms {
276 const uint8_t* img;
277 } uniforms{img};
278
279 program.eval(64, &uniforms, buf32, buf16, buf8);
280
281 for (int i = 0; i < 64; i++) {
282 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
283 }
284
285 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
286 REPORTER_ASSERT(r, buf16[63] == 0x2322);
287
288 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
289 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
290 });
291}
292
Mike Klein81d52672019-07-30 11:11:09 -0500293DEF_TEST(SkVM_bitops, r) {
294 skvm::Builder b;
295 {
Mike Klein00e43df2021-01-08 13:45:42 -0600296 skvm::Ptr ptr = b.varying<int>();
Mike Klein81d52672019-07-30 11:11:09 -0500297
298 skvm::I32 x = b.load32(ptr);
299
Mike Klein4067a942020-04-05 10:25:32 -0500300 x = b.bit_and (x, b.splat(0xf1)); // 0x40
301 x = b.bit_or (x, b.splat(0x80)); // 0xc0
302 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
303 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500304
305 x = b.shl(x, 28); // 0xe000'0000
306 x = b.sra(x, 28); // 0xffff'fffe
307 x = b.shr(x, 1); // 0x7fff'ffff
308
309 b.store32(ptr, x);
310 }
311
Mike Kleinfc017c72021-02-08 10:45:19 -0600312 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500313 int x = 0x42;
314 program.eval(1, &x);
315 REPORTER_ASSERT(r, x == 0x7fff'ffff);
316 });
317}
318
Mike Klein4067a942020-04-05 10:25:32 -0500319DEF_TEST(SkVM_select_is_NaN, r) {
320 skvm::Builder b;
321 {
Mike Klein00e43df2021-01-08 13:45:42 -0600322 skvm::Ptr src = b.varying<float>(),
Mike Klein4067a942020-04-05 10:25:32 -0500323 dst = b.varying<float>();
324
325 skvm::F32 x = b.loadF(src);
326 x = select(is_NaN(x), b.splat(0.0f)
327 , x);
328 b.storeF(dst, x);
329 }
330
331 std::vector<skvm::OptimizedInstruction> program = b.optimize();
332 REPORTER_ASSERT(r, program.size() == 4);
333 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
334 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
335 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
336 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
337
Mike Kleinfc017c72021-02-08 10:45:19 -0600338 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500339 // ±NaN, ±0, ±1, ±inf
340 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
341 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
342 uint32_t dst[SK_ARRAY_COUNT(src)];
343 program.eval(SK_ARRAY_COUNT(src), src, dst);
344
345 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
346 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
347 }
348 });
349}
350
Mike Klein81d52672019-07-30 11:11:09 -0500351DEF_TEST(SkVM_f32, r) {
352 skvm::Builder b;
353 {
Mike Klein00e43df2021-01-08 13:45:42 -0600354 skvm::Ptr arg = b.varying<float>();
Mike Klein81d52672019-07-30 11:11:09 -0500355
Mike Reedf5ff4c22020-03-23 14:57:53 -0400356 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500357 y = b.add(x,x), // y = 2x
358 z = b.sub(y,x), // z = 2x-x = x
359 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400360 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500361 }
362
Mike Kleinfc017c72021-02-08 10:45:19 -0600363 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500364 float buf[] = { 1,2,3,4,5,6,7,8,9 };
365 program.eval(SK_ARRAY_COUNT(buf), buf);
366 for (float v : buf) {
367 REPORTER_ASSERT(r, v == 1.0f);
368 }
369 });
370}
371
372DEF_TEST(SkVM_cmp_i32, r) {
373 skvm::Builder b;
374 {
375 skvm::I32 x = b.load32(b.varying<int>());
376
377 auto to_bit = [&](int shift, skvm::I32 mask) {
378 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
379 };
380
381 skvm::I32 m = b.splat(0);
382 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
383 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
384 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
385 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
386 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
387 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
388
389 b.store32(b.varying<int>(), m);
390 }
Mike Kleinfc017c72021-02-08 10:45:19 -0600391 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500392 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
393 int out[SK_ARRAY_COUNT(in)];
394
395 program.eval(SK_ARRAY_COUNT(in), in, out);
396
397 REPORTER_ASSERT(r, out[0] == 0b001111);
398 REPORTER_ASSERT(r, out[1] == 0b001100);
399 REPORTER_ASSERT(r, out[2] == 0b001010);
400 REPORTER_ASSERT(r, out[3] == 0b001010);
401 REPORTER_ASSERT(r, out[4] == 0b000010);
402 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
403 REPORTER_ASSERT(r, out[i] == 0b110010);
404 }
405 });
406}
407
408DEF_TEST(SkVM_cmp_f32, r) {
409 skvm::Builder b;
410 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400411 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500412
413 auto to_bit = [&](int shift, skvm::I32 mask) {
414 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
415 };
416
417 skvm::I32 m = b.splat(0);
418 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
419 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
420 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
421 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
422 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
423 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
424
425 b.store32(b.varying<int>(), m);
426 }
427
Mike Kleinfc017c72021-02-08 10:45:19 -0600428 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500429 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
430 int out[SK_ARRAY_COUNT(in)];
431
432 program.eval(SK_ARRAY_COUNT(in), in, out);
433
434 REPORTER_ASSERT(r, out[0] == 0b001111);
435 REPORTER_ASSERT(r, out[1] == 0b001100);
436 REPORTER_ASSERT(r, out[2] == 0b001010);
437 REPORTER_ASSERT(r, out[3] == 0b001010);
438 REPORTER_ASSERT(r, out[4] == 0b000010);
439 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
440 REPORTER_ASSERT(r, out[i] == 0b110010);
441 }
442 });
443}
444
Mike Klein14548b92020-02-28 14:02:29 -0600445DEF_TEST(SkVM_index, r) {
446 skvm::Builder b;
447 b.store32(b.varying<int>(), b.index());
448
Mike Kleinfc017c72021-02-08 10:45:19 -0600449 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600450 int buf[23];
451 program.eval(SK_ARRAY_COUNT(buf), buf);
452 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
453 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
454 }
455 });
456}
457
Mike Klein4a131192019-07-19 13:56:41 -0500458DEF_TEST(SkVM_mad, r) {
459 // This program is designed to exercise the tricky corners of instruction
460 // and register selection for Op::mad_f32.
461
462 skvm::Builder b;
463 {
Mike Klein00e43df2021-01-08 13:45:42 -0600464 skvm::Ptr arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500465
Mike Kleincac130f2020-09-25 14:47:44 -0500466 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein4a131192019-07-19 13:56:41 -0500467 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
468 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
469 w = b.mad(z,z,y), // w can alias z but not y.
470 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600471 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500472 }
473
Mike Kleinfc017c72021-02-08 10:45:19 -0600474 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500475 int x = 2;
476 program.eval(1, &x);
477 // x = 2
478 // y = 2*2 + 2 = 6
479 // z = 6*6 + 2 = 38
480 // w = 38*38 + 6 = 1450
481 // v = 1450*6 + 1450 = 10150
482 REPORTER_ASSERT(r, x == 10150);
483 });
484}
485
Mike Klein7c0332c2020-03-05 14:18:04 -0600486DEF_TEST(SkVM_fms, r) {
487 // Create a pattern that can be peepholed into an Op::fms_f32.
488 skvm::Builder b;
489 {
Mike Klein00e43df2021-01-08 13:45:42 -0600490 skvm::Ptr arg = b.varying<int>();
Mike Klein7c0332c2020-03-05 14:18:04 -0600491
Mike Kleincac130f2020-09-25 14:47:44 -0500492 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600493 v = b.sub(b.mul(x, b.splat(2.0f)),
494 b.splat(1.0f));
495 b.store32(arg, b.trunc(v));
496 }
497
Mike Kleinfc017c72021-02-08 10:45:19 -0600498 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600499 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
500 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
501
502 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
503 REPORTER_ASSERT(r, buf[i] = 2*i-1);
504 }
505 });
506}
507
508DEF_TEST(SkVM_fnma, r) {
509 // Create a pattern that can be peepholed into an Op::fnma_f32.
510 skvm::Builder b;
511 {
Mike Klein00e43df2021-01-08 13:45:42 -0600512 skvm::Ptr arg = b.varying<int>();
Mike Klein7c0332c2020-03-05 14:18:04 -0600513
Mike Kleincac130f2020-09-25 14:47:44 -0500514 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600515 v = b.sub(b.splat(1.0f),
516 b.mul(x, b.splat(2.0f)));
517 b.store32(arg, b.trunc(v));
518 }
519
Mike Kleinfc017c72021-02-08 10:45:19 -0600520 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600521 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
522 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
523
524 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
525 REPORTER_ASSERT(r, buf[i] = 1-2*i);
526 }
527 });
528}
529
Mike Klein81d52672019-07-30 11:11:09 -0500530DEF_TEST(SkVM_madder, r) {
531 skvm::Builder b;
532 {
Mike Klein00e43df2021-01-08 13:45:42 -0600533 skvm::Ptr arg = b.varying<float>();
Mike Klein81d52672019-07-30 11:11:09 -0500534
Mike Reedf5ff4c22020-03-23 14:57:53 -0400535 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500536 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
537 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
538 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400539 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500540 }
541
Mike Kleinfc017c72021-02-08 10:45:19 -0600542 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500543 float x = 2.0f;
544 // y = 2*2 + 2 = 6
545 // z = 6*2 + 6 = 18
546 // w = 6*6 + 18 = 54
547 program.eval(1, &x);
548 REPORTER_ASSERT(r, x == 54.0f);
549 });
550}
551
Mike Kleinf22faaf2020-01-09 07:27:39 -0600552DEF_TEST(SkVM_floor, r) {
553 skvm::Builder b;
554 {
Mike Klein00e43df2021-01-08 13:45:42 -0600555 skvm::Ptr arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400556 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600557 }
558
Mike Kleinfc017c72021-02-08 10:45:19 -0600559 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600560 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
561 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
562 program.eval(SK_ARRAY_COUNT(buf), buf);
563 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
564 REPORTER_ASSERT(r, buf[i] == want[i]);
565 }
566 });
567}
568
Mike Klein5caf7de2020-03-12 11:05:46 -0500569DEF_TEST(SkVM_round, r) {
570 skvm::Builder b;
571 {
Mike Klein00e43df2021-01-08 13:45:42 -0600572 skvm::Ptr src = b.varying<float>();
573 skvm::Ptr dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400574 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500575 }
576
577 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
578 // We haven't explicitly guaranteed that here... it just probably is.
Mike Kleinfc017c72021-02-08 10:45:19 -0600579 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500580 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
581 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
582 int dst[SK_ARRAY_COUNT(buf)];
583
584 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
585 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
586 REPORTER_ASSERT(r, dst[i] == want[i]);
587 }
588 });
589}
590
Herb Derbyc02a41f2020-02-28 14:25:45 -0600591DEF_TEST(SkVM_min, r) {
592 skvm::Builder b;
593 {
Mike Klein00e43df2021-01-08 13:45:42 -0600594 skvm::Ptr src1 = b.varying<float>();
595 skvm::Ptr src2 = b.varying<float>();
596 skvm::Ptr dst = b.varying<float>();
Herb Derbyc02a41f2020-02-28 14:25:45 -0600597
Mike Reedf5ff4c22020-03-23 14:57:53 -0400598 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600599 }
600
Mike Kleinfc017c72021-02-08 10:45:19 -0600601 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600602 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
603 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
604 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
605 float d[SK_ARRAY_COUNT(s1)];
606 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
607 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
608 REPORTER_ASSERT(r, d[i] == want[i]);
609 }
610 });
611}
612
613DEF_TEST(SkVM_max, r) {
614 skvm::Builder b;
615 {
Mike Klein00e43df2021-01-08 13:45:42 -0600616 skvm::Ptr src1 = b.varying<float>();
617 skvm::Ptr src2 = b.varying<float>();
618 skvm::Ptr dst = b.varying<float>();
Herb Derbyc02a41f2020-02-28 14:25:45 -0600619
Mike Reedf5ff4c22020-03-23 14:57:53 -0400620 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600621 }
622
Mike Kleinfc017c72021-02-08 10:45:19 -0600623 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600624 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
625 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
626 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
627 float d[SK_ARRAY_COUNT(s1)];
628 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
629 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
630 REPORTER_ASSERT(r, d[i] == want[i]);
631 }
632 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600633}
634
Mike Kleinf98d0d32019-07-22 14:30:18 -0500635DEF_TEST(SkVM_hoist, r) {
636 // This program uses enough constants that it will fail to JIT if we hoist them.
637 // The JIT will try again without hoisting, and that'll just need 2 registers.
638 skvm::Builder b;
639 {
Mike Klein00e43df2021-01-08 13:45:42 -0600640 skvm::Ptr arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500641 skvm::I32 x = b.load32(arg);
642 for (int i = 0; i < 32; i++) {
643 x = b.add(x, b.splat(i));
644 }
645 b.store32(arg, x);
646 }
647
Mike Kleinfc017c72021-02-08 10:45:19 -0600648 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500649 int x = 4;
650 program.eval(1, &x);
651 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
652 // x += 496
653 REPORTER_ASSERT(r, x == 500);
654 });
655}
656
Mike Kleinb9944122019-08-02 12:22:39 -0500657DEF_TEST(SkVM_select, r) {
658 skvm::Builder b;
659 {
Mike Klein00e43df2021-01-08 13:45:42 -0600660 skvm::Ptr buf = b.varying<int>();
Mike Kleinb9944122019-08-02 12:22:39 -0500661
662 skvm::I32 x = b.load32(buf);
663
664 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
665
666 b.store32(buf, x);
667 }
668
Mike Kleinfc017c72021-02-08 10:45:19 -0600669 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500670 int buf[] = { 0,1,2,3,4,5,6,7,8 };
671 program.eval(SK_ARRAY_COUNT(buf), buf);
672 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
673 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
674 }
675 });
676}
677
Mike Kleinf471c822021-01-05 13:31:15 -0600678DEF_TEST(SkVM_swap, r) {
679 skvm::Builder b;
680 {
681 // This program is the equivalent of
682 // x = *X
683 // y = *Y
684 // *X = y
685 // *Y = x
686 // One rescheduling of the program based only on data flow of Op arguments is
687 // x = *X
688 // *Y = x
689 // y = *Y
690 // *X = y
691 // but this reordering does not produce the same results and is invalid.
Mike Klein00e43df2021-01-08 13:45:42 -0600692 skvm::Ptr X = b.varying<int>(),
Mike Kleinf471c822021-01-05 13:31:15 -0600693 Y = b.varying<int>();
694
695 skvm::I32 x = b.load32(X),
696 y = b.load32(Y);
697
698 b.store32(X, y);
699 b.store32(Y, x);
700 }
701
Mike Kleinfc017c72021-02-08 10:45:19 -0600702 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Kleinf471c822021-01-05 13:31:15 -0600703 int b1[] = { 0,1,2,3 };
704 int b2[] = { 4,5,6,7 };
705 program.eval(SK_ARRAY_COUNT(b1), b1, b2);
706 for (int i = 0; i < (int)SK_ARRAY_COUNT(b1); i++) {
707 REPORTER_ASSERT(r, b1[i] == 4 + i);
708 REPORTER_ASSERT(r, b2[i] == i);
709 }
710 });
711}
712
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500713DEF_TEST(SkVM_NewOps, r) {
714 // Exercise a somewhat arbitrary set of new ops.
715 skvm::Builder b;
716 {
Mike Klein00e43df2021-01-08 13:45:42 -0600717 skvm::Ptr buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500718 uniforms = b.uniform();
719
720 skvm::I32 x = b.load16(buf);
721
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600722 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500723
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600724 x = b.add(x, b.uniform32(uniforms, kPtr+0));
Mike Klein8b16bee2020-11-25 10:54:02 -0600725 x = b.mul(x, b.uniform32(uniforms, kPtr+4));
726 x = b.sub(x, b.uniform32(uniforms, kPtr+8));
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600727
Mike Klein8b16bee2020-11-25 10:54:02 -0600728 skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500729 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
730 x = b.select(b.gt(x, limit ), limit , x);
731
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600732 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500733
734 b.store16(buf, x);
735 }
736
Mike Kleinfc017c72021-02-08 10:45:19 -0600737 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500738 const int N = 31;
739 int16_t buf[N];
740 for (int i = 0; i < N; i++) {
741 buf[i] = i;
742 }
743
744 const int M = 16;
745 uint8_t img[M];
746 for (int i = 0; i < M; i++) {
747 img[i] = i*i;
748 }
749
750 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600751 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500752 int add = 5;
Mike Klein8b16bee2020-11-25 10:54:02 -0600753 int mul = 3;
754 int sub = 18;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500755 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600756 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500757
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600758 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500759
760 for (int i = 0; i < N; i++) {
761 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
762 int x = 3*(i-1);
763
764 // Then that's pinned to the limits of img.
765 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
766 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
767 REPORTER_ASSERT(r, buf[i] == img[x]);
768 }
769 });
770}
771
Mike Klein5a8404c2020-02-28 14:24:56 -0600772DEF_TEST(SkVM_sqrt, r) {
773 skvm::Builder b;
774 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400775 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600776
Mike Kleinfc017c72021-02-08 10:45:19 -0600777 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600778 constexpr int K = 17;
779 float buf[K];
780 for (int i = 0; i < K; i++) {
781 buf[i] = (float)(i*i);
782 }
783
784 // x^2 -> x
785 program.eval(K, buf);
786
787 for (int i = 0; i < K; i++) {
788 REPORTER_ASSERT(r, buf[i] == (float)i);
789 }
790 });
791}
792
Mike Klein3f7c8652019-11-07 10:33:56 -0600793DEF_TEST(SkVM_MSAN, r) {
794 // This little memset32() program should be able to JIT, but if we run that
795 // JIT code in an MSAN build, it won't see the writes initialize buf. So
796 // this tests that we're using the interpreter instead.
797 skvm::Builder b;
798 b.store32(b.varying<int>(), b.splat(42));
799
Mike Kleinfc017c72021-02-08 10:45:19 -0600800 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600801 constexpr int K = 17;
802 int buf[K]; // Intentionally uninitialized.
803 program.eval(K, buf);
804 sk_msan_assert_initialized(buf, buf+K);
805 for (int x : buf) {
806 REPORTER_ASSERT(r, x == 42);
807 }
808 });
809}
810
Mike Klein13601172019-11-08 15:01:02 -0600811DEF_TEST(SkVM_assert, r) {
812 skvm::Builder b;
813 b.assert_true(b.lt(b.load32(b.varying<int>()),
814 b.splat(42)));
815
Mike Kleinfc017c72021-02-08 10:45:19 -0600816 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600817 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600818 program.eval(SK_ARRAY_COUNT(buf), buf);
819 });
820}
821
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600822DEF_TEST(SkVM_premul, reporter) {
823 // Test that premul is short-circuited when alpha is known opaque.
824 {
825 skvm::Builder p;
826 auto rptr = p.varying<int>(),
827 aptr = p.varying<int>();
828
Mike Reedf5ff4c22020-03-23 14:57:53 -0400829 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600830 g = p.splat(0.0f),
831 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400832 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600833
834 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400835 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600836
837 // load red, load alpha, red *= alpha, store red
838 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
839 }
840
841 {
842 skvm::Builder p;
843 auto rptr = p.varying<int>();
844
Mike Reedf5ff4c22020-03-23 14:57:53 -0400845 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600846 g = p.splat(0.0f),
847 b = p.splat(0.0f),
848 a = p.splat(1.0f);
849
850 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400851 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600852
853 // load red, store red
854 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
855 }
856
857 // Same deal for unpremul.
858 {
859 skvm::Builder p;
860 auto rptr = p.varying<int>(),
861 aptr = p.varying<int>();
862
Mike Reedf5ff4c22020-03-23 14:57:53 -0400863 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600864 g = p.splat(0.0f),
865 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400866 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600867
868 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400869 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600870
871 // load red, load alpha, a bunch of unpremul instructions, store red
872 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
873 }
874
875 {
876 skvm::Builder p;
877 auto rptr = p.varying<int>();
878
Mike Reedf5ff4c22020-03-23 14:57:53 -0400879 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600880 g = p.splat(0.0f),
881 b = p.splat(0.0f),
882 a = p.splat(1.0f);
883
884 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400885 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600886
887 // load red, store red
888 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
889 }
890}
Mike Klein05642042019-06-18 12:16:06 -0500891
Mike Klein05642042019-06-18 12:16:06 -0500892template <typename Fn>
893static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -0400894 uint8_t buf[4096];
895 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -0500896 fn(a);
897
898 REPORTER_ASSERT(r, a.size() == expected.size());
899
Mike Klein88c0a902019-06-24 15:34:02 -0400900 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -0500901 want = expected.begin();
902 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -0500903 REPORTER_ASSERT(r, got[i] == want[i],
904 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -0500905 }
906}
907
908DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -0500909 // Easiest way to generate test cases is
910 //
911 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
912 //
913 // The -x86-asm-syntax=intel bit is optional, controlling the
914 // input syntax only; the output will always be AT&T op x,y,dst style.
915 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
916 // that a bit easier to use here, despite maybe favoring AT&T overall.
917
918 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -0500919 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -0500920 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -0600921 a.int3();
Mike Klein05642042019-06-18 12:16:06 -0500922 a.vzeroupper();
923 a.ret();
924 },{
Mike Kleinee5864a2019-11-11 09:16:44 -0600925 0xcc,
Mike Klein05642042019-06-18 12:16:06 -0500926 0xc5, 0xf8, 0x77,
927 0xc3,
928 });
929
Mike Klein237dbb42019-07-19 09:44:47 -0500930 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -0500931 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500932 a.ret();
933 a.align(4);
934 },{
935 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -0500936 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -0500937 });
Mike Klein61703a62019-06-18 15:01:12 -0500938
Mike Klein397fc882019-06-20 11:37:10 -0500939 test_asm(r, [&](A& a) {
940 a.add(A::rax, 8); // Always good to test rax.
941 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -0500942
Mike Klein397fc882019-06-20 11:37:10 -0500943 a.add(A::rdi, 12); // Last 0x48 REX
944 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -0500945
Mike Klein86a645c2019-07-12 12:29:39 -0500946 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -0500947 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -0500948
Mike Klein397fc882019-06-20 11:37:10 -0500949 a.add(A::rsi, 128); // Requires 4 byte immediate.
950 a.sub(A::r8 , 1000000);
Mike Kleinc15c9362020-04-16 11:10:36 -0500951
952 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
953 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
954 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
Mike Klein68d075e2020-07-28 09:26:51 -0500955 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
Mike Kleinc15c9362020-04-16 11:10:36 -0500956 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
Mike Klein68d075e2020-07-28 09:26:51 -0500957 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
958 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
Mike Kleinc15c9362020-04-16 11:10:36 -0500959 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
960 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
961 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
962
963 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
964
965 a.add( A::rax , A::rcx); // addq %rcx, %rax
966 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
967 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
968 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
969
970 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
Mike Klein61703a62019-06-18 15:01:12 -0500971 },{
Mike Kleind3e75a72019-06-18 15:26:08 -0500972 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -0500973 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -0500974
975 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -0500976 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -0500977
Mike Klein86a645c2019-07-12 12:29:39 -0500978 0x49, 0x83, 0b11'000'000, 0x07,
979 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -0500980
981 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -0500982 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Kleinc15c9362020-04-16 11:10:36 -0500983
984 0x48,0x83,0x06,0x07,
985 0x48,0x83,0x46,0x0c,0x07,
986 0x48,0x83,0x44,0x24,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -0500987 0x49,0x83,0x44,0x24,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -0500988 0x48,0x83,0x44,0x84,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -0500989 0x49,0x83,0x44,0x84,0x0c,0x07,
990 0x4a,0x83,0x44,0xa0,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -0500991 0x4b,0x83,0x44,0x43,0x0c,0x07,
992 0x49,0x83,0x44,0x03,0x0c,0x07,
993 0x4a,0x83,0x44,0x18,0x0c,0x07,
994
995 0x4a,0x83,0x6c,0x18,0x0c,0x07,
996
997 0x48,0x01,0xc8,
998 0x48,0x01,0x08,
999 0x48,0x01,0x48,0x0c,
1000 0x48,0x03,0x48,0x0c,
1001 0x48,0x2b,0x48,0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001002 });
Mike Klein397fc882019-06-20 11:37:10 -05001003
1004
1005 test_asm(r, [&](A& a) {
1006 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1007 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1008 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1009 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1010 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1011 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1012 },{
1013 /* VEX */ /*op*/ /*modRM*/
1014 0xc5, 0xf5, 0xfe, 0xc2,
1015 0xc5, 0x75, 0xfe, 0xc2,
1016 0xc5, 0xbd, 0xfe, 0xc2,
1017 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1018 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1019 0xc5, 0xf5, 0xfa, 0xc2,
1020 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001021
1022 test_asm(r, [&](A& a) {
Mike Klein84dd8f92020-09-15 07:57:27 -05001023 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1024 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1025 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1026 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1027
1028 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1029 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1030 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1031 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1032
1033 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1034 a.vpabsw (A::ymm4, A::ymm3);
1035 a.vpsllw (A::ymm4, A::ymm3, 12);
1036 a.vpsraw (A::ymm4, A::ymm3, 12);
1037 },{
1038 0xc5, 0xe5, 0xfd, 0xe2,
1039 0xc5, 0xe5, 0xe3, 0xe2,
1040 0xc5, 0xe5, 0x75, 0xe2,
1041 0xc5, 0xe5, 0x65, 0xe2,
1042
1043 0xc5, 0xe5, 0xea, 0xe2,
1044 0xc5, 0xe5, 0xee, 0xe2,
1045 0xc4,0xe2,0x65, 0x3a, 0xe2,
1046 0xc4,0xe2,0x65, 0x3e, 0xe2,
1047
1048 0xc4,0xe2,0x65, 0x0b, 0xe2,
1049 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1050 0xc5,0xdd,0x71, 0xf3, 0x0c,
1051 0xc5,0xdd,0x71, 0xe3, 0x0c,
1052 });
1053
1054 test_asm(r, [&](A& a) {
Mike Klein48c51bb2020-04-23 18:07:49 -05001055 A::Label l;
1056 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
Mike Klein714f8cc2019-11-06 12:54:46 -06001057 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1058 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1059 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1060 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1061 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1062 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Klein48c51bb2020-04-23 18:07:49 -05001063 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
Mike Kleinb9944122019-08-02 12:22:39 -05001064 },{
Mike Klein48c51bb2020-04-23 18:07:49 -05001065 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
Mike Kleinb9944122019-08-02 12:22:39 -05001066 0xc5,0xf5,0x76,0xc2,
1067 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001068 0xc5,0xf4,0xc2,0xc2,0x00,
1069 0xc5,0xf4,0xc2,0xc2,0x01,
1070 0xc5,0xf4,0xc2,0xc2,0x02,
1071 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001072 });
1073
1074 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001075 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1076 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1077 },{
1078 0xc5,0xf4,0x5d,0xc2,
1079 0xc5,0xf4,0x5f,0xc2,
1080 });
1081
1082 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001083 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1084 },{
1085 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1086 });
1087
1088 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001089 a.vpsrld(A::ymm15, A::ymm2, 8);
1090 a.vpsrld(A::ymm0 , A::ymm8, 5);
1091 },{
1092 0xc5, 0x85, 0x72,0xd2, 0x08,
1093 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1094 });
1095
1096 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001097 A::Label l;
Mike Klein184f6012020-07-22 13:17:29 -05001098 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
Mike Kleind8194dc2020-07-22 10:42:11 -05001099 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
Mike Kleinff0ae812019-06-20 15:03:44 -05001100 a.vpermq(A::ymm1, A::ymm2, 5);
Mike Kleind8194dc2020-07-22 10:42:11 -05001101 a.label(&l); // 6 bytes after vperm2f128
Mike Kleinff0ae812019-06-20 15:03:44 -05001102 },{
Mike Klein184f6012020-07-22 13:17:29 -05001103 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
Mike Kleind8194dc2020-07-22 10:42:11 -05001104 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
Mike Kleinff0ae812019-06-20 15:03:44 -05001105 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1106 });
Mike Kleine5053412019-06-21 12:37:22 -05001107
1108 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001109 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1110 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1111 },{
1112 0xc5,0xed,0x62,0x0f,
1113 0xc5,0xed,0x6a,0xcb,
1114 });
1115
1116 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001117 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1118 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1119 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1120 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1121 },{
1122 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1123 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1124 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1125 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1126 });
1127
1128 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001129 A::Label l;
1130 a.label(&l);
Mike Kleine5053412019-06-21 12:37:22 -05001131 a.byte(1);
1132 a.byte(2);
1133 a.byte(3);
1134 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001135
Mike Klein65c10b52019-07-12 09:22:21 -05001136 a.vbroadcastss(A::ymm0 , &l);
1137 a.vbroadcastss(A::ymm1 , &l);
1138 a.vbroadcastss(A::ymm8 , &l);
1139 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001140
Mike Klein65c10b52019-07-12 09:22:21 -05001141 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001142 a.vpaddd (A::ymm4, A::ymm3, &l);
1143 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001144
1145 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001146
1147 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001148 },{
1149 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001150
Mike Kleine5053412019-06-21 12:37:22 -05001151 /* VEX */ /*op*/ /* ModRM */ /* offset */
1152 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1153 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1154 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1155 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001156
1157 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001158
1159 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1160 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001161
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001162 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1163
1164 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001165 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001166
1167 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001168 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1169 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1170 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1171 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
Mike Klein94d054b2019-08-02 10:54:23 -05001172
1173 a.vbroadcastss(A::ymm8, A::xmm0);
1174 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001175 },{
1176 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1177 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1178 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1179 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1180 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001181
1182 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1183 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001184 });
1185
1186 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001187 A::Label l;
1188 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001189 a.jne(&l);
1190 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001191 a.je (&l);
1192 a.jmp(&l);
1193 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001194 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001195
Mike Kleinc15c9362020-04-16 11:10:36 -05001196 a.cmp(A::rdx, 1);
Mike Klein35b97c32019-07-12 12:32:45 -05001197 a.cmp(A::rax, 12);
1198 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001199 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001200 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1201 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1202 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1203 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1204 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001205 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001206
Mike Kleinc15c9362020-04-16 11:10:36 -05001207 0x48,0x83,0xfa,0x01,
Mike Klein35b97c32019-07-12 12:32:45 -05001208 0x48,0x83,0xf8,0x0c,
1209 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001210 });
Mike Klein120d9e82019-06-21 15:52:55 -05001211
1212 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001213 a.vmovups(A::ymm5, A::Mem{A::rsi});
1214 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001215
Mike Klein400ba222020-06-30 15:54:19 -05001216 a.vmovups(A::xmm5, A::Mem{A::rsi});
Mike Klein8390f2e2020-04-15 17:03:08 -05001217 a.vmovups(A::Mem{A::rsi}, A::xmm5);
Mike Klein95529e82019-08-02 11:43:43 -05001218
Mike Kleinedc2dac2020-04-15 16:18:27 -05001219 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1220 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001221
Mike Klein8390f2e2020-04-15 17:03:08 -05001222 a.vmovq(A::Mem{A::rdx}, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001223 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001224 /* VEX */ /*Op*/ /* ModRM */
1225 0xc5, 0xfc, 0x10, 0b00'101'110,
1226 0xc5, 0xfc, 0x11, 0b00'101'110,
1227
Mike Klein400ba222020-06-30 15:54:19 -05001228 0xc5, 0xf8, 0x10, 0b00'101'110,
Mike Klein95529e82019-08-02 11:43:43 -05001229 0xc5, 0xf8, 0x11, 0b00'101'110,
1230
Mike Klein52010b72019-08-02 11:18:00 -05001231 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001232 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001233
1234 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001235 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001236
1237 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001238 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1239 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1240 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001241
Mike Kleinedc2dac2020-04-15 16:18:27 -05001242 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1243 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1244 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001245 },{
1246 0xc5,0xfc,0x10,0x2c,0x24,
1247 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1248 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1249
1250 0xc5,0xfc,0x11,0x2c,0x24,
1251 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1252 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1253 });
1254
1255 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001256 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1257 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1258 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1259 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1260 a.movzbq(A::r8, A::Mem{A::rsi, 400});
Mike Klein35b97c32019-07-12 12:32:45 -05001261
Mike Kleinc15c9362020-04-16 11:10:36 -05001262 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1263 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1264 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1265 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1266 a.movzwq(A::r8, A::Mem{A::rsi, 400});
Mike Kleincb511042020-04-13 13:12:17 -05001267
Mike Klein8390f2e2020-04-15 17:03:08 -05001268 a.vmovd(A::Mem{A::rax}, A::xmm0);
1269 a.vmovd(A::Mem{A::rax}, A::xmm8);
1270 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1271
1272 a.vmovd(A::xmm0, A::Mem{A::rax});
1273 a.vmovd(A::xmm8, A::Mem{A::rax});
1274 a.vmovd(A::xmm0, A::Mem{A::r8 });
1275
1276 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1277 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1278 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1279
Mike Klein35b97c32019-07-12 12:32:45 -05001280 a.vmovd(A::rax, A::xmm0);
1281 a.vmovd(A::rax, A::xmm8);
Mike Klein8390f2e2020-04-15 17:03:08 -05001282 a.vmovd(A::r8 , A::xmm0);
Mike Klein35b97c32019-07-12 12:32:45 -05001283
1284 a.vmovd(A::xmm0, A::rax);
1285 a.vmovd(A::xmm8, A::rax);
Mike Klein8390f2e2020-04-15 17:03:08 -05001286 a.vmovd(A::xmm0, A::r8 );
Mike Klein35b97c32019-07-12 12:32:45 -05001287
Mike Kleinc15c9362020-04-16 11:10:36 -05001288 a.movb(A::Mem{A::rdx}, A::rax);
1289 a.movb(A::Mem{A::rdx}, A::r8 );
1290 a.movb(A::Mem{A::r8 }, A::rax);
Mike Klein35b97c32019-07-12 12:32:45 -05001291
Mike Kleinc15c9362020-04-16 11:10:36 -05001292 a.movb(A::rdx, A::Mem{A::rax});
1293 a.movb(A::rdx, A::Mem{A::r8 });
1294 a.movb(A::r8 , A::Mem{A::rax});
1295
1296 a.movb(A::rdx, 12);
1297 a.movb(A::rax, 4);
1298 a.movb(A::r8 , -1);
1299
1300 a.movb(A::Mem{A::rdx}, 12);
1301 a.movb(A::Mem{A::rax}, 4);
1302 a.movb(A::Mem{A::r8 }, -1);
1303 },{
1304 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1305 0x49,0x0f,0xb6,0x00,
1306 0x4c,0x0f,0xb6,0x06,
1307 0x4c,0x0f,0xb6,0x46, 12,
1308 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1309
1310 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1311 0x49,0x0f,0xb7,0x00,
1312 0x4c,0x0f,0xb7,0x06,
1313 0x4c,0x0f,0xb7,0x46, 12,
1314 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
Mike Kleincb511042020-04-13 13:12:17 -05001315
Mike Klein35b97c32019-07-12 12:32:45 -05001316 0xc5,0xf9,0x7e,0x00,
1317 0xc5,0x79,0x7e,0x00,
1318 0xc4,0xc1,0x79,0x7e,0x00,
1319
1320 0xc5,0xf9,0x6e,0x00,
1321 0xc5,0x79,0x6e,0x00,
1322 0xc4,0xc1,0x79,0x6e,0x00,
1323
Mike Klein93d3fab2020-01-14 10:46:44 -06001324 0xc5,0xf9,0x6e,0x04,0x88,
1325 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1326 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1327
Mike Klein35b97c32019-07-12 12:32:45 -05001328 0xc5,0xf9,0x7e,0xc0,
1329 0xc5,0x79,0x7e,0xc0,
1330 0xc4,0xc1,0x79,0x7e,0xc0,
1331
1332 0xc5,0xf9,0x6e,0xc0,
1333 0xc5,0x79,0x6e,0xc0,
1334 0xc4,0xc1,0x79,0x6e,0xc0,
1335
Mike Kleinc15c9362020-04-16 11:10:36 -05001336 0x48 ,0x88, 0x02,
1337 0x4c, 0x88, 0x02,
1338 0x49, 0x88, 0x00,
1339
1340 0x48 ,0x8a, 0x10,
1341 0x49, 0x8a, 0x10,
1342 0x4c, 0x8a, 0x00,
1343
1344 0x48, 0xc6, 0xc2, 0x0c,
1345 0x48, 0xc6, 0xc0, 0x04,
1346 0x49, 0xc6, 0xc0, 0xff,
1347
1348 0x48, 0xc6, 0x02, 0x0c,
1349 0x48, 0xc6, 0x00, 0x04,
1350 0x49, 0xc6, 0x00, 0xff,
Mike Klein35b97c32019-07-12 12:32:45 -05001351 });
1352
1353 test_asm(r, [&](A& a) {
Mike Klein4ecc9702020-07-30 10:03:10 -05001354 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1355 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1356
Mike Klein8390f2e2020-04-15 17:03:08 -05001357 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1358 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
Mike Klein52010b72019-08-02 11:18:00 -05001359
Mike Klein8390f2e2020-04-15 17:03:08 -05001360 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
Mike Klein4ecc9702020-07-30 10:03:10 -05001361 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
Mike Klein35b97c32019-07-12 12:32:45 -05001362
Mike Klein21e85eb2020-04-17 13:57:13 -05001363 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1364 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1365
1366 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1367 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1368
Mike Klein8390f2e2020-04-15 17:03:08 -05001369 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1370 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein95529e82019-08-02 11:43:43 -05001371
Mike Klein8390f2e2020-04-15 17:03:08 -05001372 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1373 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein35b97c32019-07-12 12:32:45 -05001374 },{
Mike Klein4ecc9702020-07-30 10:03:10 -05001375 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1376 0xc4,0x43,0x71, 0x22, 0x00, 3,
1377
Mike Klein52010b72019-08-02 11:18:00 -05001378 0xc5,0xb9, 0xc4, 0x0e, 4,
1379 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1380
Mike Klein35b97c32019-07-12 12:32:45 -05001381 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1382 0xc4,0x43,0x71, 0x20, 0x00, 12,
1383
Mike Klein21e85eb2020-04-17 13:57:13 -05001384 0xc4,0x63,0x7d,0x39,0xc1, 1,
1385 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1386
1387 0xc4,0x63,0x79,0x16,0x06, 3,
1388 0xc4,0xc3,0x79,0x16,0x08, 2,
1389
Mike Klein95529e82019-08-02 11:43:43 -05001390 0xc4,0x63,0x79, 0x15, 0x06, 7,
1391 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1392
Mike Klein35b97c32019-07-12 12:32:45 -05001393 0xc4,0x63,0x79, 0x14, 0x06, 7,
1394 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1395 });
1396
1397 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001398 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1399 },{
1400 0xc5, 0x9d, 0xdf, 0xda,
1401 });
Mike Klein9f4df802019-06-24 18:47:16 -04001402
Mike Kleind4546d62019-07-30 12:15:40 -05001403 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001404 A::Label l;
1405 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1406
1407 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1408 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1409 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1410
1411 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1412 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1413
1414 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1415 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1416 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1417 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1418 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1419
1420 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1421 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1422 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1423
Mike Kleind4546d62019-07-30 12:15:40 -05001424 a.vcvttps2dq(A::ymm3, A::ymm2);
1425 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001426 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001427 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001428 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001429 },{
1430 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001431
1432 0xc5,0xfd,0x6f,0x1e,
1433 0xc5,0xfd,0x6f,0x1c,0x24,
1434 0xc4,0xc1,0x7d,0x6f,0x1b,
1435
1436 0xc5,0xfd,0x6f,0x5e,0x04,
1437 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1438
1439 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1440 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1441 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1442 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1443 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1444
1445 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1446 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1447
1448 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1449
Mike Kleind4546d62019-07-30 12:15:40 -05001450 0xc5,0xfe,0x5b,0xda,
1451 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001452 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001453 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001454 });
1455
Mike Kleinbeaa1082020-01-13 14:04:18 -06001456 test_asm(r, [&](A& a) {
Mike Klein4d680cd2020-07-15 09:58:51 -05001457 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1458 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1459
1460 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1461 a.vcvtph2ps(A::ymm2, A::xmm3);
1462 },{
1463 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1464 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1465
1466 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1467 0xc4,0xe2,0x7d,0x13,0xd3,
1468 });
1469
1470 test_asm(r, [&](A& a) {
Mike Kleinbeaa1082020-01-13 14:04:18 -06001471 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1472 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1473 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1474 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1475 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1476 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1477 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1478 },{
1479 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1480 0xc4,0xe2,0x75,0x92,0x04,0x10,
1481 0xc4,0x62,0x75,0x92,0x14,0x10,
1482 0xc4,0xa2,0x75,0x92,0x04,0x20,
1483 0xc4,0xc2,0x75,0x92,0x04,0x11,
1484 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1485 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1486 });
1487
Mike Kleinc322f632020-01-13 16:18:58 -06001488 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001489 a.mov(A::rax, A::Mem{A::rdi, 0});
1490 a.mov(A::rax, A::Mem{A::rdi, 1});
1491 a.mov(A::rax, A::Mem{A::rdi, 512});
1492 a.mov(A::r15, A::Mem{A::r13, 42});
1493 a.mov(A::rax, A::Mem{A::r13, 42});
1494 a.mov(A::r15, A::Mem{A::rax, 42});
1495 a.mov(A::rax, 1);
1496 a.mov(A::rax, A::rcx);
Mike Kleinc322f632020-01-13 16:18:58 -06001497 },{
1498 0x48, 0x8b, 0x07,
1499 0x48, 0x8b, 0x47, 0x01,
1500 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1501 0x4d, 0x8b, 0x7d, 0x2a,
1502 0x49, 0x8b, 0x45, 0x2a,
1503 0x4c, 0x8b, 0x78, 0x2a,
Mike Kleinc15c9362020-04-16 11:10:36 -05001504 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1505 0x48, 0x89, 0xc8,
Mike Kleinc322f632020-01-13 16:18:58 -06001506 });
1507
Mike Klein9f4df802019-06-24 18:47:16 -04001508 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1509
1510 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001511 a.and16b(A::v4, A::v3, A::v1);
1512 a.orr16b(A::v4, A::v3, A::v1);
1513 a.eor16b(A::v4, A::v3, A::v1);
1514 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001515 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001516 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001517
1518 a.add4s(A::v4, A::v3, A::v1);
1519 a.sub4s(A::v4, A::v3, A::v1);
1520 a.mul4s(A::v4, A::v3, A::v1);
1521
Mike Klein97afd2e2019-10-16 14:11:27 -05001522 a.cmeq4s(A::v4, A::v3, A::v1);
1523 a.cmgt4s(A::v4, A::v3, A::v1);
1524
Mike Klein65809142019-06-25 09:44:02 -04001525 a.sub8h(A::v4, A::v3, A::v1);
1526 a.mul8h(A::v4, A::v3, A::v1);
1527
Mike Klein9f4df802019-06-24 18:47:16 -04001528 a.fadd4s(A::v4, A::v3, A::v1);
1529 a.fsub4s(A::v4, A::v3, A::v1);
1530 a.fmul4s(A::v4, A::v3, A::v1);
1531 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001532 a.fmin4s(A::v4, A::v3, A::v1);
1533 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein8d78da92020-11-25 13:53:20 -06001534
1535 a.fneg4s (A::v4, A::v3);
1536 a.fsqrt4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001537
Mike Klein65809142019-06-25 09:44:02 -04001538 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001539 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001540
1541 a.fcmeq4s(A::v4, A::v3, A::v1);
1542 a.fcmgt4s(A::v4, A::v3, A::v1);
1543 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001544 },{
Mike Klein65809142019-06-25 09:44:02 -04001545 0x64,0x1c,0x21,0x4e,
1546 0x64,0x1c,0xa1,0x4e,
1547 0x64,0x1c,0x21,0x6e,
1548 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001549 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001550 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001551
1552 0x64,0x84,0xa1,0x4e,
1553 0x64,0x84,0xa1,0x6e,
1554 0x64,0x9c,0xa1,0x4e,
1555
Mike Klein97afd2e2019-10-16 14:11:27 -05001556 0x64,0x8c,0xa1,0x6e,
1557 0x64,0x34,0xa1,0x4e,
1558
Mike Klein65809142019-06-25 09:44:02 -04001559 0x64,0x84,0x61,0x6e,
1560 0x64,0x9c,0x61,0x4e,
1561
Mike Klein9f4df802019-06-24 18:47:16 -04001562 0x64,0xd4,0x21,0x4e,
1563 0x64,0xd4,0xa1,0x4e,
1564 0x64,0xdc,0x21,0x6e,
1565 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001566 0x64,0xf4,0xa1,0x4e,
1567 0x64,0xf4,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001568
Mike Klein7c0332c2020-03-05 14:18:04 -06001569 0x64,0xf8,0xa0,0x6e,
Mike Klein8d78da92020-11-25 13:53:20 -06001570 0x64,0xf8,0xa1,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001571
Mike Klein65809142019-06-25 09:44:02 -04001572 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001573 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001574
1575 0x64,0xe4,0x21,0x4e,
1576 0x64,0xe4,0xa1,0x6e,
1577 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001578 });
1579
1580 test_asm(r, [&](A& a) {
1581 a.shl4s(A::v4, A::v3, 0);
1582 a.shl4s(A::v4, A::v3, 1);
1583 a.shl4s(A::v4, A::v3, 8);
1584 a.shl4s(A::v4, A::v3, 16);
1585 a.shl4s(A::v4, A::v3, 31);
1586
1587 a.sshr4s(A::v4, A::v3, 1);
1588 a.sshr4s(A::v4, A::v3, 8);
1589 a.sshr4s(A::v4, A::v3, 31);
1590
1591 a.ushr4s(A::v4, A::v3, 1);
1592 a.ushr4s(A::v4, A::v3, 8);
1593 a.ushr4s(A::v4, A::v3, 31);
1594
1595 a.ushr8h(A::v4, A::v3, 1);
1596 a.ushr8h(A::v4, A::v3, 8);
1597 a.ushr8h(A::v4, A::v3, 15);
1598 },{
1599 0x64,0x54,0x20,0x4f,
1600 0x64,0x54,0x21,0x4f,
1601 0x64,0x54,0x28,0x4f,
1602 0x64,0x54,0x30,0x4f,
1603 0x64,0x54,0x3f,0x4f,
1604
1605 0x64,0x04,0x3f,0x4f,
1606 0x64,0x04,0x38,0x4f,
1607 0x64,0x04,0x21,0x4f,
1608
1609 0x64,0x04,0x3f,0x6f,
1610 0x64,0x04,0x38,0x6f,
1611 0x64,0x04,0x21,0x6f,
1612
1613 0x64,0x04,0x1f,0x6f,
1614 0x64,0x04,0x18,0x6f,
1615 0x64,0x04,0x11,0x6f,
1616 });
1617
1618 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001619 a.sli4s(A::v4, A::v3, 0);
1620 a.sli4s(A::v4, A::v3, 1);
1621 a.sli4s(A::v4, A::v3, 8);
1622 a.sli4s(A::v4, A::v3, 16);
1623 a.sli4s(A::v4, A::v3, 31);
1624 },{
1625 0x64,0x54,0x20,0x6f,
1626 0x64,0x54,0x21,0x6f,
1627 0x64,0x54,0x28,0x6f,
1628 0x64,0x54,0x30,0x6f,
1629 0x64,0x54,0x3f,0x6f,
1630 });
1631
1632 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001633 a.scvtf4s (A::v4, A::v3);
1634 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001635 a.fcvtns4s(A::v4, A::v3);
Mike Klein8d78da92020-11-25 13:53:20 -06001636 a.frintp4s(A::v4, A::v3);
1637 a.frintm4s(A::v4, A::v3);
Mike Kleinec255632020-12-03 10:25:31 -06001638 a.fcvtn (A::v4, A::v3);
1639 a.fcvtl (A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001640 },{
1641 0x64,0xd8,0x21,0x4e,
1642 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001643 0x64,0xa8,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001644 0x64,0x88,0xa1,0x4e,
1645 0x64,0x98,0x21,0x4e,
Mike Kleinec255632020-12-03 10:25:31 -06001646 0x64,0x68,0x21,0x0e,
1647 0x64,0x78,0x21,0x0e,
Mike Klein9f4df802019-06-24 18:47:16 -04001648 });
Mike Klein15a368d2019-06-26 10:21:12 -04001649
1650 test_asm(r, [&](A& a) {
Mike Kleinb8e041e2020-04-17 11:30:29 -05001651 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1652 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1653 a.strq(A::v1, A::sp); // str q1, [sp]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001654 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001655 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001656 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001657 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1658 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001659 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001660 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001661 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001662 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1663 a.add (A::sp, A::sp, 32); // add sp, sp, #32
Mike Klein48e78242020-04-17 09:38:09 -05001664 },{
1665 0xff,0x83,0x00,0xd1,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001666 0xe0,0x07,0x80,0x3d,
1667 0xe1,0x03,0x80,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001668 0xe0,0x1b,0x00,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001669 0xe0,0x1b,0x00,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001670 0xe0,0x2b,0x00,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001671 0xe0,0xbf,0x00,0x3d,
1672 0xe9,0xab,0x40,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001673 0xe9,0xbf,0x40,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001674 0xe7,0x2b,0x40,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001675 0xe7,0x07,0x40,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001676 0xe5,0x03,0xc2,0x3d,
Mike Klein48e78242020-04-17 09:38:09 -05001677 0xff,0x83,0x00,0x91,
1678 });
1679
1680 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001681 a.brk(0);
1682 a.brk(65535);
1683
Mike Klein15a368d2019-06-26 10:21:12 -04001684 a.ret(A::x30); // Conventional ret using link register.
1685 a.ret(A::x13); // Can really return using any register if we like.
1686
1687 a.add(A::x2, A::x2, 4);
1688 a.add(A::x3, A::x2, 32);
1689
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001690 a.sub(A::x2, A::x2, 4);
1691 a.sub(A::x3, A::x2, 32);
1692
Mike Klein15a368d2019-06-26 10:21:12 -04001693 a.subs(A::x2, A::x2, 4);
1694 a.subs(A::x3, A::x2, 32);
1695
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001696 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1697 a.cmp(A::x2, 4);
1698
Mike Kleinc74db792020-05-11 11:57:12 -05001699 A::Label l;
1700 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001701 a.bne(&l);
1702 a.bne(&l);
1703 a.blt(&l);
1704 a.b(&l);
1705 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001706 a.cbz(A::x2, &l);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001707
1708 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1
1709 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3
Mike Klein15a368d2019-06-26 10:21:12 -04001710 },{
Mike Klein37be7712019-11-13 13:19:01 -06001711 0x00,0x00,0x20,0xd4,
1712 0xe0,0xff,0x3f,0xd4,
1713
Mike Klein15a368d2019-06-26 10:21:12 -04001714 0xc0,0x03,0x5f,0xd6,
1715 0xa0,0x01,0x5f,0xd6,
1716
1717 0x42,0x10,0x00,0x91,
1718 0x43,0x80,0x00,0x91,
1719
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001720 0x42,0x10,0x00,0xd1,
1721 0x43,0x80,0x00,0xd1,
1722
Mike Klein15a368d2019-06-26 10:21:12 -04001723 0x42,0x10,0x00,0xf1,
1724 0x43,0x80,0x00,0xf1,
1725
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001726 0x5f,0x10,0x00,0xf1,
1727 0x5f,0x10,0x00,0xf1,
1728
1729 0x01,0x00,0x00,0x54, // b.ne #0
1730 0xe1,0xff,0xff,0x54, // b.ne #-4
1731 0xcb,0xff,0xff,0x54, // b.lt #-8
1732 0xae,0xff,0xff,0x54, // b.al #-12
1733 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1734 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Kleindbc19ea2020-11-18 13:32:14 -06001735
1736 0x43,0x00,0x01,0x8b,
1737 0x43,0x0c,0x81,0x8b,
Mike Klein15a368d2019-06-26 10:21:12 -04001738 });
Mike Kleine51632e2019-06-26 14:47:43 -04001739
Mike Kleince7b88c2019-07-11 14:06:40 -05001740 // Can we cbz() to a not-yet-defined label?
1741 test_asm(r, [&](A& a) {
1742 A::Label l;
1743 a.cbz(A::x2, &l);
1744 a.add(A::x3, A::x2, 32);
1745 a.label(&l);
1746 a.ret(A::x30);
1747 },{
1748 0x42,0x00,0x00,0xb4, // cbz x2, #8
1749 0x43,0x80,0x00,0x91, // add x3, x2, #32
1750 0xc0,0x03,0x5f,0xd6, // ret
1751 });
1752
1753 // If we start a label as a backward label,
1754 // can we redefine it to be a future label?
1755 // (Not sure this is useful... just want to test it works.)
1756 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001757 A::Label l1;
1758 a.label(&l1);
Mike Kleince7b88c2019-07-11 14:06:40 -05001759 a.add(A::x3, A::x2, 32);
1760 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1761
Mike Kleinc74db792020-05-11 11:57:12 -05001762 A::Label l2; // Start off the same...
1763 a.label(&l2);
Mike Kleince7b88c2019-07-11 14:06:40 -05001764 a.add(A::x3, A::x2, 32);
1765 a.cbz(A::x2, &l2); // Looks like this will go backward...
1766 a.add(A::x2, A::x2, 4);
1767 a.add(A::x3, A::x2, 32);
1768 a.label(&l2); // But no... actually forward! What a switcheroo!
1769 },{
1770 0x43,0x80,0x00,0x91, // add x3, x2, #32
1771 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1772
1773 0x43,0x80,0x00,0x91, // add x3, x2, #32
1774 0x62,0x00,0x00,0xb4, // cbz x2, #12
1775 0x42,0x10,0x00,0x91, // add x2, x2, #4
1776 0x43,0x80,0x00,0x91, // add x3, x2, #32
1777 });
1778
Mike Klein81d52672019-07-30 11:11:09 -05001779 // Loading from a label on ARM.
1780 test_asm(r, [&](A& a) {
1781 A::Label fore,aft;
1782 a.label(&fore);
1783 a.word(0x01234567);
1784 a.ldrq(A::v1, &fore);
1785 a.ldrq(A::v2, &aft);
1786 a.label(&aft);
1787 a.word(0x76543210);
1788 },{
1789 0x67,0x45,0x23,0x01,
1790 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1791 0x22,0x00,0x00,0x9c, // ldr q2, #4
1792 0x10,0x32,0x54,0x76,
1793 });
1794
Mike Kleine51632e2019-06-26 14:47:43 -04001795 test_asm(r, [&](A& a) {
1796 a.ldrq(A::v0, A::x8);
1797 a.strq(A::v0, A::x8);
1798 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001799 0x00,0x01,0xc0,0x3d,
1800 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001801 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001802
1803 test_asm(r, [&](A& a) {
Mike Klein8d78da92020-11-25 13:53:20 -06001804 a.dup4s (A::v0, A::x8);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001805 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
1806 a.ld1r8h (A::v0, A::x8);
1807 a.ld1r16b(A::v0, A::x8);
1808 },{
Mike Klein8d78da92020-11-25 13:53:20 -06001809 0x00,0x0d,0x04,0x4e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001810 0x00,0xc9,0x40,0x4d,
1811 0x00,0xc5,0x40,0x4d,
1812 0x00,0xc1,0x40,0x4d,
1813 });
1814
1815 test_asm(r, [&](A& a) {
Mike Kleindd069a92021-01-20 13:51:33 -06001816 a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding
1817 a.ld44s(A::v0, A::x8);
1818 a.st24s(A::v0, A::x8);
1819 a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding
Mike Kleinf988bb52021-01-27 12:53:34 -06001820
1821 a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding
1822 a.ld24s(A::v0, A::x8, 1);
1823 a.ld24s(A::v0, A::x8, 2);
1824 a.ld24s(A::v0, A::x8, 3);
1825
1826 a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8]
1827 a.ld44s(A::v0, A::x8, 1);
1828 a.ld44s(A::v0, A::x8, 2);
1829 a.ld44s(A::v0, A::x8, 3);
Mike Kleindd069a92021-01-20 13:51:33 -06001830 },{
1831 0x00,0x89,0x40,0x4c,
1832 0x00,0x09,0x40,0x4c,
1833 0x00,0x89,0x00,0x4c,
1834 0x00,0x09,0x00,0x4c,
Mike Kleinf988bb52021-01-27 12:53:34 -06001835
1836 0x00,0x81,0x60,0x0d,
1837 0x00,0x91,0x60,0x0d,
1838 0x00,0x81,0x60,0x4d,
1839 0x00,0x91,0x60,0x4d,
1840
1841 0x00,0xa1,0x60,0x0d,
1842 0x00,0xb1,0x60,0x0d,
1843 0x00,0xa1,0x60,0x4d,
1844 0x00,0xb1,0x60,0x4d,
Mike Kleindd069a92021-01-20 13:51:33 -06001845 });
1846
1847 test_asm(r, [&](A& a) {
Mike Klein1fa149a2019-07-01 11:18:08 -05001848 a.xtns2h(A::v0, A::v0);
1849 a.xtnh2b(A::v0, A::v0);
1850 a.strs (A::v0, A::x0);
1851
1852 a.ldrs (A::v0, A::x0);
1853 a.uxtlb2h(A::v0, A::v0);
1854 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001855
1856 a.uminv4s(A::v3, A::v4);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001857 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0]
1858 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1]
1859 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3
Mike Klein1fa149a2019-07-01 11:18:08 -05001860 },{
1861 0x00,0x28,0x61,0x0e,
1862 0x00,0x28,0x21,0x0e,
1863 0x00,0x00,0x00,0xbd,
1864
1865 0x00,0x00,0x40,0xbd,
1866 0x00,0xa4,0x08,0x2f,
1867 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001868
1869 0x83,0xa8,0xb1,0x6e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001870 0x83,0x3c,0x04,0x0e,
1871 0x83,0x3c,0x0c,0x0e,
1872 0x64,0x1c,0x1c,0x4e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001873 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001874
1875 test_asm(r, [&](A& a) {
1876 a.ldrb(A::v0, A::x8);
1877 a.strb(A::v0, A::x8);
1878 },{
1879 0x00,0x01,0x40,0x3d,
1880 0x00,0x01,0x00,0x3d,
1881 });
Mike Klein81d52672019-07-30 11:11:09 -05001882
1883 test_asm(r, [&](A& a) {
Mike Kleindbc19ea2020-11-18 13:32:14 -06001884 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24]
1885 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12]
1886 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6]
1887 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3]
Mike Kleina7470df2020-12-03 12:06:27 -06001888
1889 a.strs(A::x0, A::x1, 3); // str w0, [x1, #12]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001890 },{
1891 0x20,0x0c,0x40,0xf9,
1892 0x20,0x0c,0x40,0xb9,
1893 0x20,0x0c,0x40,0x79,
1894 0x20,0x0c,0x40,0x39,
Mike Kleina7470df2020-12-03 12:06:27 -06001895
1896 0x20,0x0c,0x00,0xb9,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001897 });
1898
1899 test_asm(r, [&](A& a) {
Mike Kleinf5097db2020-12-03 09:21:00 -06001900 a.tbl (A::v0, A::v1, A::v2);
Mike Kleinc7bca522020-12-03 10:01:29 -06001901 a.uzp14s(A::v0, A::v1, A::v2);
1902 a.uzp24s(A::v0, A::v1, A::v2);
Mike Kleinf5097db2020-12-03 09:21:00 -06001903 a.zip14s(A::v0, A::v1, A::v2);
1904 a.zip24s(A::v0, A::v1, A::v2);
Mike Klein81d52672019-07-30 11:11:09 -05001905 },{
1906 0x20,0x00,0x02,0x4e,
Mike Kleinc7bca522020-12-03 10:01:29 -06001907 0x20,0x18,0x82,0x4e,
1908 0x20,0x58,0x82,0x4e,
Mike Kleinf5097db2020-12-03 09:21:00 -06001909 0x20,0x38,0x82,0x4e,
1910 0x20,0x78,0x82,0x4e,
Mike Klein81d52672019-07-30 11:11:09 -05001911 });
Mike Klein05642042019-06-18 12:16:06 -05001912}
Mike Reedbcb46c02020-03-23 17:51:01 -04001913
1914DEF_TEST(SkVM_approx_math, r) {
1915 auto eval = [](int N, float values[], auto fn) {
1916 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06001917 skvm::Ptr inout = b.varying<float>();
Mike Reedbcb46c02020-03-23 17:51:01 -04001918
1919 b.storeF(inout, fn(&b, b.loadF(inout)));
1920
1921 b.done().eval(N, values);
1922 };
1923
1924 auto compare = [r](int N, const float values[], const float expected[]) {
1925 for (int i = 0; i < N; ++i) {
1926 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1927 }
1928 };
1929
1930 // log2
1931 {
1932 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1933 constexpr int N = SK_ARRAY_COUNT(values);
1934 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1935 return b->approx_log2(v);
1936 });
1937 const float expected[] = {-2, -1, 0, 1, 2, 3};
1938 compare(N, values, expected);
1939 }
1940
1941 // pow2
1942 {
1943 float values[] = {-2, -1, 0, 1, 2, 3};
1944 constexpr int N = SK_ARRAY_COUNT(values);
1945 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1946 return b->approx_pow2(v);
1947 });
1948 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
1949 compare(N, values, expected);
1950 }
1951
1952 // powf -- x^0.5
1953 {
1954 float bases[] = {0, 1, 4, 9, 16};
1955 constexpr int N = SK_ARRAY_COUNT(bases);
1956 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
1957 return b->approx_powf(base, b->splat(0.5f));
1958 });
1959 const float expected[] = {0, 1, 2, 3, 4};
1960 compare(N, bases, expected);
1961 }
1962 // powf -- 3^x
1963 {
1964 float exps[] = {-2, -1, 0, 1, 2};
1965 constexpr int N = SK_ARRAY_COUNT(exps);
1966 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
1967 return b->approx_powf(b->splat(3.0f), exp);
1968 });
1969 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
1970 compare(N, exps, expected);
1971 }
Mike Reed82ff25e2020-04-07 13:51:41 -04001972
Mike Reedd468a162020-04-11 14:14:00 -04001973 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04001974 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06001975 skvm::Ptr inout = b.varying<float>();
Mike Reed82ff25e2020-04-07 13:51:41 -04001976 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04001977 float actual = arg;
1978 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04001979
Mike Reedd468a162020-04-11 14:14:00 -04001980 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04001981
1982 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04001983 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04001984 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04001985 }
Mike Reed1b84ef22020-04-13 17:56:24 -04001986 return err;
1987 };
1988
1989 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
1990 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06001991 skvm::Ptr in0 = b.varying<float>();
1992 skvm::Ptr in1 = b.varying<float>();
1993 skvm::Ptr out = b.varying<float>();
Mike Reed1b84ef22020-04-13 17:56:24 -04001994 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
1995 float actual;
1996 b.done().eval(1, &arg0, &arg1, &actual);
1997
1998 float err = std::abs(actual - expected);
1999
2000 if (err > tolerance) {
2001 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2002 REPORTER_ASSERT(r, true);
2003 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002004 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04002005 };
2006
Mike Reed801ba0d2020-04-10 12:37:36 -04002007 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04002008 {
2009 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04002010 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04002011 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2012 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2013 return approx_sin(x);
2014 });
2015 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2016 return approx_cos(x);
2017 });
2018 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002019
2020 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2021 // so bring in the domain a little.
2022 constexpr float eps = 0.16f;
2023 float err = 0;
2024 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2025 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2026 return approx_tan(x);
2027 });
2028 // try again with some multiples of P, to check our periodicity
2029 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2030 return approx_tan(x + 3*P);
2031 });
2032 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2033 return approx_tan(x - 3*P);
2034 });
2035 }
Mike Reedd468a162020-04-11 14:14:00 -04002036 if (0) { SkDebugf("tan error %g\n", err); }
2037 }
2038
2039 // asin, acos, atan
2040 {
2041 constexpr float tol = 0.00175f;
2042 float err = 0;
2043 for (float x = -1; x <= 1; x += 1.0f/64) {
2044 err += test(x, asin(x), tol, [](skvm::F32 x) {
2045 return approx_asin(x);
2046 });
2047 test(x, acos(x), tol, [](skvm::F32 x) {
2048 return approx_acos(x);
2049 });
Mike Reed801ba0d2020-04-10 12:37:36 -04002050 }
Mike Reedd468a162020-04-11 14:14:00 -04002051 if (0) { SkDebugf("asin error %g\n", err); }
2052
2053 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04002054 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04002055 err += test(x, atan(x), tol, [](skvm::F32 x) {
2056 return approx_atan(x);
2057 });
2058 }
2059 if (0) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04002060
2061 for (float y = -3; y <= 3; y += 1) {
2062 for (float x = -3; x <= 3; x += 1) {
2063 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
Mike Klein962020f2020-05-01 10:35:18 -05002064 return approx_atan2(y,x);
Mike Reed1b84ef22020-04-13 17:56:24 -04002065 });
2066 }
2067 }
2068 if (0) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04002069 }
Mike Reedbcb46c02020-03-23 17:51:01 -04002070}
Mike Klein210288f2020-04-08 11:31:07 -05002071
2072DEF_TEST(SkVM_min_max, r) {
2073 // min() and max() have subtle behavior when one argument is NaN and
2074 // the other isn't. It's not sound to blindly swap their arguments.
2075 //
2076 // All backends must behave like std::min() and std::max(), which are
2077 //
2078 // min(x,y) = y<x ? y : x
2079 // max(x,y) = x<y ? y : x
2080
2081 // ±NaN, ±0, ±1, ±inf
2082 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2083 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2084
2085 float f[8];
2086 memcpy(f, bits, sizeof(bits));
2087
2088 auto identical = [&](float x, float y) {
2089 uint32_t X,Y;
2090 memcpy(&X, &x, 4);
2091 memcpy(&Y, &y, 4);
2092 return X == Y;
2093 };
2094
2095 // Test min/max with non-constant x, non-constant y.
2096 // (Whether x and y are varying or uniform shouldn't make any difference.)
2097 {
2098 skvm::Builder b;
2099 {
Mike Klein00e43df2021-01-08 13:45:42 -06002100 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002101 mn = b.varying<float>(),
2102 mx = b.varying<float>();
2103
2104 skvm::F32 x = b.loadF(src),
2105 y = b.uniformF(b.uniform(), 0);
2106
2107 b.storeF(mn, b.min(x,y));
2108 b.storeF(mx, b.max(x,y));
2109 }
2110
Mike Kleinfc017c72021-02-08 10:45:19 -06002111 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002112 float mn[8], mx[8];
2113 for (int i = 0; i < 8; i++) {
2114 // min() and max() everything with f[i].
2115 program.eval(8, f,mn,mx, &f[i]);
2116
2117 for (int j = 0; j < 8; j++) {
2118 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2119 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2120 }
2121 }
2122 });
2123 }
2124
2125 // Test each with constant on the right.
2126 for (int i = 0; i < 8; i++) {
2127 skvm::Builder b;
2128 {
Mike Klein00e43df2021-01-08 13:45:42 -06002129 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002130 mn = b.varying<float>(),
2131 mx = b.varying<float>();
2132
2133 skvm::F32 x = b.loadF(src),
2134 y = b.splat(f[i]);
2135
2136 b.storeF(mn, b.min(x,y));
2137 b.storeF(mx, b.max(x,y));
2138 }
2139
Mike Kleinfc017c72021-02-08 10:45:19 -06002140 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002141 float mn[8], mx[8];
2142 program.eval(8, f,mn,mx);
2143 for (int j = 0; j < 8; j++) {
2144 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2145 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2146 }
2147 });
2148 }
2149
2150 // Test each with constant on the left.
2151 for (int i = 0; i < 8; i++) {
2152 skvm::Builder b;
2153 {
Mike Klein00e43df2021-01-08 13:45:42 -06002154 skvm::Ptr src = b.varying<float>(),
Mike Klein210288f2020-04-08 11:31:07 -05002155 mn = b.varying<float>(),
2156 mx = b.varying<float>();
2157
2158 skvm::F32 x = b.splat(f[i]),
2159 y = b.loadF(src);
2160
2161 b.storeF(mn, b.min(x,y));
2162 b.storeF(mx, b.max(x,y));
2163 }
2164
Mike Kleinfc017c72021-02-08 10:45:19 -06002165 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002166 float mn[8], mx[8];
2167 program.eval(8, f,mn,mx);
2168 for (int j = 0; j < 8; j++) {
2169 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2170 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2171 }
2172 });
2173 }
2174}
Mike Klein4d680cd2020-07-15 09:58:51 -05002175
2176DEF_TEST(SkVM_halfs, r) {
2177 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2178 0xc400,0xb800,0xbc00,0xc000};
2179 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2180 -4.0f,-0.5f,-1.0f,-2.0f};
2181 {
2182 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002183 skvm::Ptr src = b.varying<uint16_t>(),
Mike Klein4d680cd2020-07-15 09:58:51 -05002184 dst = b.varying<float>();
Mike Klein42d67a62020-12-01 10:14:55 -06002185 b.storeF(dst, b.from_fp16(b.load16(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002186
Mike Kleinfc017c72021-02-08 10:45:19 -06002187 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein4d680cd2020-07-15 09:58:51 -05002188 float dst[8];
2189 program.eval(8, hs, dst);
2190 for (int i = 0; i < 8; i++) {
2191 REPORTER_ASSERT(r, dst[i] == fs[i]);
2192 }
2193 });
2194 }
2195 {
2196 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002197 skvm::Ptr src = b.varying<float>(),
Mike Klein4d680cd2020-07-15 09:58:51 -05002198 dst = b.varying<uint16_t>();
Mike Klein42d67a62020-12-01 10:14:55 -06002199 b.store16(dst, b.to_fp16(b.loadF(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002200
Mike Kleinfc017c72021-02-08 10:45:19 -06002201 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein4d680cd2020-07-15 09:58:51 -05002202 uint16_t dst[8];
2203 program.eval(8, fs, dst);
2204 for (int i = 0; i < 8; i++) {
2205 REPORTER_ASSERT(r, dst[i] == hs[i]);
2206 }
2207 });
2208 }
2209}
Mike Klein6732da02020-07-16 13:03:18 -05002210
2211DEF_TEST(SkVM_64bit, r) {
2212 uint32_t lo[65],
2213 hi[65];
2214 uint64_t wide[65];
2215 for (int i = 0; i < 65; i++) {
2216 lo[i] = 2*i+0;
2217 hi[i] = 2*i+1;
2218 wide[i] = ((uint64_t)lo[i] << 0)
2219 | ((uint64_t)hi[i] << 32);
2220 }
2221
2222 {
2223 skvm::Builder b;
2224 {
Mike Klein00e43df2021-01-08 13:45:42 -06002225 skvm::Ptr wide = b.varying<uint64_t>(),
Mike Klein6732da02020-07-16 13:03:18 -05002226 lo = b.varying<int>(),
2227 hi = b.varying<int>();
Mike Klein31367892020-07-30 08:19:12 -05002228 b.store32(lo, b.load64(wide, 0));
2229 b.store32(hi, b.load64(wide, 1));
Mike Klein6732da02020-07-16 13:03:18 -05002230 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002231 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein6732da02020-07-16 13:03:18 -05002232 uint32_t l[65], h[65];
2233 program.eval(65, wide,l,h);
2234 for (int i = 0; i < 65; i++) {
2235 REPORTER_ASSERT(r, l[i] == lo[i]);
2236 REPORTER_ASSERT(r, h[i] == hi[i]);
2237 }
2238 });
2239 }
2240
2241 {
2242 skvm::Builder b;
2243 {
Mike Klein00e43df2021-01-08 13:45:42 -06002244 skvm::Ptr wide = b.varying<uint64_t>(),
Mike Klein6732da02020-07-16 13:03:18 -05002245 lo = b.varying<int>(),
2246 hi = b.varying<int>();
2247 b.store64(wide, b.load32(lo), b.load32(hi));
2248 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002249 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein6732da02020-07-16 13:03:18 -05002250 uint64_t w[65];
2251 program.eval(65, w,lo,hi);
2252 for (int i = 0; i < 65; i++) {
2253 REPORTER_ASSERT(r, w[i] == wide[i]);
2254 }
2255 });
2256 }
2257}
Mike Kleine942b8c2020-07-21 10:17:14 -05002258
Mike Kleinb19518d2020-12-03 14:39:41 -06002259DEF_TEST(SkVM_128bit, r) {
2260 float floats[4*63];
2261 uint8_t packed[4*63];
2262
2263 for (int i = 0; i < 4*63; i++) {
2264 floats[i] = i * (1/255.0f);
2265 }
2266
Mike Klein447f3312021-02-08 09:46:59 -06002267 skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType),
2268 rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType);
Mike Kleinb19518d2020-12-03 14:39:41 -06002269
2270 { // Convert RGBA F32 to RGBA 8888, testing 128-bit loads.
2271 skvm::Builder b;
2272 {
Mike Klein00e43df2021-01-08 13:45:42 -06002273 skvm::Ptr dst = b.arg( 4),
Mike Kleinb19518d2020-12-03 14:39:41 -06002274 src = b.arg(16);
2275
2276 skvm::Color c = b.load(rgba_ffff, src);
2277 b.store(rgba_8888, dst, c);
2278 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002279 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleinb19518d2020-12-03 14:39:41 -06002280 memset(packed, 0, sizeof(packed));
2281 program.eval(63, packed, floats);
2282 for (int i = 0; i < 4*63; i++) {
2283 REPORTER_ASSERT(r, packed[i] == i);
2284 }
2285 });
2286 }
2287
2288
2289 { // Convert RGBA 8888 to RGBA F32, testing 128-bit stores.
2290 skvm::Builder b;
2291 {
Mike Klein00e43df2021-01-08 13:45:42 -06002292 skvm::Ptr dst = b.arg(16),
Mike Kleinb19518d2020-12-03 14:39:41 -06002293 src = b.arg( 4);
2294
2295 skvm::Color c = b.load(rgba_8888, src);
2296 b.store(rgba_ffff, dst, c);
2297 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002298 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleinb19518d2020-12-03 14:39:41 -06002299 memset(floats, 0, sizeof(floats));
2300 program.eval(63, floats, packed);
2301 for (int i = 0; i < 4*63; i++) {
2302 REPORTER_ASSERT(r, floats[i] == i * (1/255.0f));
2303 }
2304 });
2305 }
2306
2307}
2308
Mike Kleine942b8c2020-07-21 10:17:14 -05002309DEF_TEST(SkVM_is_NaN_is_finite, r) {
2310 skvm::Builder b;
2311 {
Mike Klein00e43df2021-01-08 13:45:42 -06002312 skvm::Ptr src = b.varying<float>(),
Mike Kleine942b8c2020-07-21 10:17:14 -05002313 nan = b.varying<int>(),
2314 fin = b.varying<int>();
2315 b.store32(nan, is_NaN (b.loadF(src)));
2316 b.store32(fin, is_finite(b.loadF(src)));
2317 }
Mike Kleinfc017c72021-02-08 10:45:19 -06002318 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Kleine942b8c2020-07-21 10:17:14 -05002319 // ±NaN, ±0, ±1, ±inf
2320 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2321 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2322 uint32_t nan[8], fin[8];
2323 program.eval(8, bits, nan,fin);
2324
2325 for (int i = 0; i < 8; i++) {
2326 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2327 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2328 i == 4 || i == 5) ? 0xffffffff : 0));
2329 }
2330 });
2331}
Mike Klein0cfd5032020-07-28 11:08:27 -05002332
2333DEF_TEST(SkVM_args, r) {
2334 // Test we can handle at least six arguments.
2335 skvm::Builder b;
2336 {
Mike Klein00e43df2021-01-08 13:45:42 -06002337 skvm::Ptr dst = b.varying<float>(),
Mike Klein0cfd5032020-07-28 11:08:27 -05002338 A = b.varying<float>(),
2339 B = b.varying<float>(),
2340 C = b.varying<float>(),
2341 D = b.varying<float>(),
2342 E = b.varying<float>();
2343 storeF(dst, b.loadF(A)
2344 + b.loadF(B)
2345 + b.loadF(C)
2346 + b.loadF(D)
2347 + b.loadF(E));
2348 }
2349
Mike Kleinfc017c72021-02-08 10:45:19 -06002350 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein0cfd5032020-07-28 11:08:27 -05002351 float dst[17],A[17],B[17],C[17],D[17],E[17];
2352 for (int i = 0; i < 17; i++) {
2353 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2354 }
2355 program.eval(17, dst,A,B,C,D,E);
2356 for (int i = 0; i < 17; i++) {
2357 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2358 }
2359 });
2360}
Mike Klein9791e502020-09-15 12:43:38 -05002361
Mike Kleinee40ec62020-11-20 15:34:16 -06002362DEF_TEST(SkVM_badpack, r) {
2363 // Test case distilled from actual failing draw,
2364 // originally with a bad arm64 implementation of pack().
2365 skvm::Builder p;
2366 {
Mike Klein00e43df2021-01-08 13:45:42 -06002367 skvm::Ptr uniforms = p.uniform(),
Mike Kleinee40ec62020-11-20 15:34:16 -06002368 dst = p.varying<uint16_t>();
2369
Mike Klein5ec9c4e2020-12-01 10:43:46 -06002370 skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
Mike Kleinee40ec62020-11-20 15:34:16 -06002371 a = p.splat(0xf);
2372
2373 skvm::I32 _4444 = p.splat(0);
2374 _4444 = pack(_4444, r, 12);
2375 _4444 = pack(_4444, a, 0);
2376 store16(dst, _4444);
2377 }
2378
Mike Kleinfc017c72021-02-08 10:45:19 -06002379 test_jit_and_interpreter(p, [&](const skvm::Program& program){
Mike Kleinee40ec62020-11-20 15:34:16 -06002380 const float uniforms[] = { 0.0f, 0.0f,
2381 1.0f, 0.0f, 0.0f, 1.0f };
2382
2383 uint16_t dst[17] = {0};
2384 program.eval(17, uniforms,dst);
2385 for (int i = 0; i < 17; i++) {
2386 REPORTER_ASSERT(r, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
2387 }
2388 });
2389}
Mike Klein960bd2d2020-12-21 14:33:55 -06002390
2391DEF_TEST(SkVM_features, r) {
2392 auto build_program = [](skvm::Builder* b) {
2393 skvm::F32 x = b->loadF(b->varying<float>());
2394 b->storeF(b->varying<float>(), x*x+x);
2395 };
2396
2397 { // load-fma-store with FMA available.
2398 skvm::Features features;
2399 features.fma = true;
2400 skvm::Builder b(features);
2401 build_program(&b);
2402 REPORTER_ASSERT(r, b.optimize().size() == 3);
2403 }
2404
2405 { // load-mul-add-store without FMA.
2406 skvm::Features features;
2407 features.fma = false;
2408 skvm::Builder b(features);
2409 build_program(&b);
2410 REPORTER_ASSERT(r, b.optimize().size() == 4);
2411 }
2412
2413 { // Auto-detected, could be either.
2414 skvm::Builder b;
2415 build_program(&b);
2416 REPORTER_ASSERT(r, b.optimize().size() == 3
2417 || b.optimize().size() == 4);
2418 }
2419}
Mike Klein0a804272021-01-06 10:36:22 -06002420
2421DEF_TEST(SkVM_gather_can_hoist, r) {
2422 // A gather instruction isn't necessarily varying... it's whatever its index is.
2423 // First a typical gather scenario with varying index.
2424 {
2425 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002426 skvm::Ptr uniforms = b.uniform(),
Mike Klein0a804272021-01-06 10:36:22 -06002427 buf = b.varying<int>();
2428 skvm::I32 ix = b.load32(buf);
2429 b.store32(buf, b.gather32(uniforms,0, ix));
2430
2431 skvm::Program p = b.done();
2432
2433 // ix is varying, so the gather is too.
2434 //
2435 // loop:
2436 // v0 = load32 buf
2437 // v1 = gather32 uniforms+0 v0
2438 // store32 buf v1
2439 REPORTER_ASSERT(r, p.instructions().size() == 3);
2440 REPORTER_ASSERT(r, p.loop() == 0);
2441 }
2442
2443 // Now the same but with a uniform index instead.
2444 {
2445 skvm::Builder b;
Mike Klein00e43df2021-01-08 13:45:42 -06002446 skvm::Ptr uniforms = b.uniform(),
Mike Klein0a804272021-01-06 10:36:22 -06002447 buf = b.varying<int>();
2448 skvm::I32 ix = b.uniform32(uniforms,8);
2449 b.store32(buf, b.gather32(uniforms,0, ix));
2450
2451 skvm::Program p = b.done();
2452
2453 // ix is uniform, so the gather is too.
2454 //
2455 // v0 = uniform32 uniforms+8
2456 // v1 = gather32 uniforms+0 v0
2457 // loop:
2458 // store32 buf v1
2459 REPORTER_ASSERT(r, p.instructions().size() == 3);
2460 REPORTER_ASSERT(r, p.loop() == 2);
2461 }
2462}
Mike Klein279ca2e2021-01-06 10:57:19 -06002463
2464DEF_TEST(SkVM_dont_dedup_loads, r) {
2465 // We've been assuming that all Ops with the same arguments produce the same value
2466 // and deduplicating them, which results in a simple common subexpression eliminator.
2467 //
2468 // But we can't soundly dedup two identical loads with a store between.
2469 // If we dedup the loads in this test program it will always increment by 1, not K.
2470 constexpr int K = 2;
2471 skvm::Builder b;
2472 {
Mike Klein00e43df2021-01-08 13:45:42 -06002473 skvm::Ptr buf = b.varying<int>();
Mike Klein279ca2e2021-01-06 10:57:19 -06002474 for (int i = 0; i < K; i++) {
2475 b.store32(buf, b.load32(buf) + 1);
2476 }
2477 }
2478
Mike Kleinfc017c72021-02-08 10:45:19 -06002479 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein279ca2e2021-01-06 10:57:19 -06002480 int buf[] = { 0,1,2,3,4 };
2481 program.eval(SK_ARRAY_COUNT(buf), buf);
2482 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
2483 REPORTER_ASSERT(r, buf[i] == i+K);
2484 }
2485 });
2486}
2487
2488DEF_TEST(SkVM_dont_dedup_stores, r) {
2489 // Following a similar line of reasoning to SkVM_dont_dedup_loads,
2490 // we cannot dedup stores either. A different store between two identical stores
2491 // will invalidate the first store, meaning we do need to reissue that store operation.
2492 skvm::Builder b;
2493 {
Mike Klein00e43df2021-01-08 13:45:42 -06002494 skvm::Ptr buf = b.varying<int>();
Mike Klein279ca2e2021-01-06 10:57:19 -06002495 b.store32(buf, b.splat(4));
2496 b.store32(buf, b.splat(5));
2497 b.store32(buf, b.splat(4)); // If we dedup'd, we'd skip this store.
2498 }
2499
Mike Kleinfc017c72021-02-08 10:45:19 -06002500 test_jit_and_interpreter(b, [&](const skvm::Program& program){
Mike Klein279ca2e2021-01-06 10:57:19 -06002501 int buf[42];
2502 program.eval(SK_ARRAY_COUNT(buf), buf);
2503 for (int x : buf) {
2504 REPORTER_ASSERT(r, x == 4);
2505 }
2506 });
2507}
Mike Kleinff4decc2021-02-10 16:13:35 -06002508
2509DEF_TEST(SkVM_fast_mul, r) {
2510 skvm::Builder b;
2511 {
2512 skvm::Ptr src = b.varying<float>(),
2513 fast = b.varying<float>(),
2514 slow = b.varying<float>();
2515 skvm::F32 x = b.loadF(src);
2516 b.storeF(fast, fast_mul(0.0f, x));
2517 b.storeF(slow, 0.0f * x);
2518 }
2519 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2520 const uint32_t bits[] = {
2521 0x0000'0000, 0x8000'0000, //±0
2522 0x3f80'0000, 0xbf80'0000, //±1
2523 0x7f80'0000, 0xff80'0000, //±inf
2524 0x7f80'0001, 0xff80'0001, //±NaN
2525 };
2526 float fast[8],
2527 slow[8];
2528 program.eval(8,bits,fast,slow);
2529
2530 for (int i = 0; i < 8; i++) {
2531 REPORTER_ASSERT(r, fast[i] == 0.0f);
2532
2533 if (i < 4) {
2534 REPORTER_ASSERT(r, slow[i] == 0.0f);
2535 } else {
2536 REPORTER_ASSERT(r, isnan(slow[i]));
2537 }
2538 }
2539 });
2540}