blob: b60a000079c007894459eef91d34b159ab715afa [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050014#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050015#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050016
Mike Klein7b7077c2019-06-03 17:10:59 -050017using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050018const char* fmt_name(Fmt fmt) {
19 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050020 case Fmt::A8: return "A8";
21 case Fmt::G8: return "G8";
22 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050023 }
24 return "";
25}
26
Mike Klein6b4143e2019-09-18 11:49:29 -050027static void dump(skvm::Builder& builder, SkWStream* o) {
28 skvm::Program program = builder.done();
29 builder.dump(o);
30 o->writeText("\n");
31 program.dump(o);
32 o->writeText("\n");
33}
Mike Klein7e650762019-07-02 15:21:11 -050034
Mike Klein9977efa2019-07-15 12:22:36 -050035template <typename Fn>
Mike Klein10fc1e62020-04-13 11:57:05 -050036static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
37#if defined(SKVM_LLVM)
38 SkASSERT(program.hasJIT());
39#elif defined(SKVM_JIT) && defined(SK_CPU_X86) // soon!
40 // SkASSERT(program.hasJIT());
41#elif defined(SKVM_JIT) // eventually!
42 // SkASSERT(program.hasJIT());
43#else
44 SkASSERT(!program.hasJIT());
45#endif
Mike Klein52435502019-10-16 10:11:56 -050046
Mike Klein10fc1e62020-04-13 11:57:05 -050047 if (program.hasJIT()) {
Mike Kleinb5a30762019-10-16 10:11:56 -050048 test((const skvm::Program&) program);
49 program.dropJIT();
50 }
Mike Klein10fc1e62020-04-13 11:57:05 -050051 test((const skvm::Program&) program);
Mike Kleinb5a30762019-10-16 10:11:56 -050052}
53
54
Mike Klein68c50d02019-05-29 12:57:54 -050055DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050056 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050057
58 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050059 for (int s = 0; s < 3; s++)
60 for (int d = 0; d < 3; d++) {
61 auto srcFmt = (Fmt)s,
62 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050063 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050064
Mike Klein267f5072019-06-03 16:27:46 -050065 buf.writeText(fmt_name(srcFmt));
66 buf.writeText(" over ");
67 buf.writeText(fmt_name(dstFmt));
68 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050069 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050070 }
Mike Klein68c50d02019-05-29 12:57:54 -050071
Mike Klein7b7077c2019-06-03 17:10:59 -050072 // Write the I32 Srcovers also.
73 {
Mike Kleinaab45b52019-07-02 15:39:23 -050074 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050075 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050076 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050077 }
Mike Klein7b7077c2019-06-03 17:10:59 -050078
Mike Kleinf9963112019-08-08 15:13:25 -040079 {
Mike Kleind48488b2019-10-22 12:27:58 -050080 // Demonstrate the value of program reordering.
81 skvm::Builder b;
82 skvm::Arg sp = b.varying<int>(),
83 dp = b.varying<int>();
84
85 skvm::I32 byte = b.splat(0xff);
86
87 skvm::I32 src = b.load32(sp),
88 sr = b.extract(src, 0, byte),
89 sg = b.extract(src, 8, byte),
90 sb = b.extract(src, 16, byte),
91 sa = b.extract(src, 24, byte);
92
93 skvm::I32 dst = b.load32(dp),
94 dr = b.extract(dst, 0, byte),
95 dg = b.extract(dst, 8, byte),
96 db = b.extract(dst, 16, byte),
97 da = b.extract(dst, 24, byte);
98
99 skvm::I32 R = b.add(sr, dr),
100 G = b.add(sg, dg),
101 B = b.add(sb, db),
102 A = b.add(sa, da);
103
104 skvm::I32 rg = b.pack(R, G, 8),
105 ba = b.pack(B, A, 8),
106 rgba = b.pack(rg, ba, 16);
107
108 b.store32(dp, rgba);
109
110 dump(b, &buf);
111 }
112
Mike Klein238105b2020-03-04 17:05:32 -0600113 // Our checked in dump expectations assume we have FMA support.
Mike Klein10fc1e62020-04-13 11:57:05 -0500114 if (skvm::fma_supported()) {
Mike Klein238105b2020-03-04 17:05:32 -0600115 sk_sp<SkData> blob = buf.detachAsData();
116 {
Mike Klein267f5072019-06-03 16:27:46 -0500117
Mike Klein238105b2020-03-04 17:05:32 -0600118 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
119 REPORTER_ASSERT(r, expected, "Couldn't load SkVMTest.expected.");
120 if (expected) {
121 if (blob->size() != expected->size()
122 || 0 != memcmp(blob->data(), expected->data(), blob->size())) {
Mike Klein267f5072019-06-03 16:27:46 -0500123
Mike Klein238105b2020-03-04 17:05:32 -0600124 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
125 expected->size(), expected->data(),
126 blob->size(), blob->data());
127 }
Mike Klein77163312019-06-04 13:35:32 -0500128
Mike Klein238105b2020-03-04 17:05:32 -0600129 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
130 if (out.isValid()) {
131 out.write(blob->data(), blob->size());
132 }
Mike Klein77163312019-06-04 13:35:32 -0500133 }
Mike Klein68c50d02019-05-29 12:57:54 -0500134 }
135 }
136
Mike Klein9977efa2019-07-15 12:22:36 -0500137 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500138 uint32_t src[9];
139 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500140
Mike Klein10fc1e62020-04-13 11:57:05 -0500141 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
Mike Klein9977efa2019-07-15 12:22:36 -0500142 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
143 src[i] = 0xbb007733;
144 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500145 }
Mike Klein9977efa2019-07-15 12:22:36 -0500146
147 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
148
149 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
150
151 // dst is probably 0xff2dad72.
152 for (auto got : dst) {
153 auto want = expected;
154 for (int i = 0; i < 4; i++) {
155 uint8_t d = got & 0xff,
156 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500157 if (abs(d-w) >= 2) {
158 SkDebugf("d %02x, w %02x\n", d,w);
159 }
Mike Klein9977efa2019-07-15 12:22:36 -0500160 REPORTER_ASSERT(r, abs(d-w) < 2);
161 got >>= 8;
162 want >>= 8;
163 }
164 }
165 });
Mike Klein3f593792019-06-12 12:54:52 -0500166 };
Mike Klein68c50d02019-05-29 12:57:54 -0500167
Mike Klein37607d42019-07-18 10:17:28 -0500168 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
169 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500170
Mike Klein10fc1e62020-04-13 11:57:05 -0500171 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500172 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500173 uint32_t src[9];
174 uint8_t dst[SK_ARRAY_COUNT(src)];
175
176 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
177 src[i] = 0xbb007733;
178 dst[i] = 0x42;
179 }
180
181 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
182 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500183
184 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
185 SkGetPackedG32(over),
186 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500187 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500188
Mike Klein3f593792019-06-12 12:54:52 -0500189 for (auto got : dst) {
190 REPORTER_ASSERT(r, abs(got-want) < 3);
191 }
Mike Klein9977efa2019-07-15 12:22:36 -0500192 });
Mike Klein68c50d02019-05-29 12:57:54 -0500193
Mike Klein10fc1e62020-04-13 11:57:05 -0500194 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500195 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500196 uint8_t src[256],
197 dst[256];
198 for (int i = 0; i < 256; i++) {
199 src[i] = 255 - i;
200 dst[i] = i;
201 }
202
203 program.eval(256, src, dst);
204
205 for (int i = 0; i < 256; i++) {
206 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
207 SkPackARGB32( i, 0,0,0)));
208 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
209 }
Mike Klein9977efa2019-07-15 12:22:36 -0500210 });
Mike Klein68c50d02019-05-29 12:57:54 -0500211}
Mike Klein81756e42019-06-12 11:36:28 -0500212
Mike Klein7542ab52020-04-02 08:50:16 -0500213DEF_TEST(SkVM_eliminate_dead_code, r) {
214 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400215 {
Mike Klein7542ab52020-04-02 08:50:16 -0500216 skvm::Arg arg = b.varying<int>();
217 skvm::I32 l = b.load32(arg);
218 skvm::I32 a = b.add(l, l);
219 b.add(a, b.splat(7));
220 }
Herb Derbyf20400e2020-03-18 16:11:25 -0400221
Mike Klein7542ab52020-04-02 08:50:16 -0500222 std::vector<skvm::Instruction> program = b.program();
223 REPORTER_ASSERT(r, program.size() == 4);
224
Mike Klein5b701e12020-04-02 10:34:24 -0500225 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -0500226 REPORTER_ASSERT(r, program.size() == 0);
227}
228
229DEF_TEST(SkVM_Usage, r) {
230 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400231 {
Mike Klein7542ab52020-04-02 08:50:16 -0500232 skvm::Arg arg = b.varying<int>(),
233 buf = b.varying<int>();
234 skvm::I32 l = b.load32(arg);
235 skvm::I32 a = b.add(l, l);
236 skvm::I32 s = b.add(a, b.splat(7));
237 b.store32(buf, s);
Herb Derbyf20400e2020-03-18 16:11:25 -0400238 }
Mike Klein7542ab52020-04-02 08:50:16 -0500239
Mike Kleinb7d87902020-04-02 10:14:35 -0500240 skvm::Usage usage{b.program()};
Mike Klein7542ab52020-04-02 08:50:16 -0500241 REPORTER_ASSERT(r, b.program()[0].op == skvm::Op::load32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500242 REPORTER_ASSERT(r, usage[0].size() == 2);
Mike Klein7542ab52020-04-02 08:50:16 -0500243 REPORTER_ASSERT(r, b.program()[1].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500244 REPORTER_ASSERT(r, usage[1].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500245 REPORTER_ASSERT(r, b.program()[2].op == skvm::Op::splat);
Mike Kleinb7d87902020-04-02 10:14:35 -0500246 REPORTER_ASSERT(r, usage[2].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500247 REPORTER_ASSERT(r, b.program()[3].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500248 REPORTER_ASSERT(r, usage[3].size() == 1);
Herb Derbyf20400e2020-03-18 16:11:25 -0400249}
250
Mike Klein9fdadb92019-07-30 12:30:13 -0500251DEF_TEST(SkVM_Pointless, r) {
252 // Let's build a program with no memory arguments.
253 // It should all be pegged as dead code, but we should be able to "run" it.
254 skvm::Builder b;
255 {
256 b.add(b.splat(5.0f),
257 b.splat(4.0f));
258 }
259
Mike Klein10fc1e62020-04-13 11:57:05 -0500260 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -0500261 for (int N = 0; N < 64; N++) {
262 program.eval(N);
263 }
264 });
265
Mike Kleined9b1f12020-02-06 13:02:32 -0600266 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -0500267 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -0500268 }
269}
270
Mike Klein10fc1e62020-04-13 11:57:05 -0500271DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -0600272 skvm::Builder b;
273 b.store32(b.varying<int>(), b.splat(42));
274
Mike Klein10fc1e62020-04-13 11:57:05 -0500275 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
276 int buf[18];
277 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -0600278
Mike Klein10fc1e62020-04-13 11:57:05 -0500279 p.eval(17, buf);
280 for (int i = 0; i < 17; i++) {
281 REPORTER_ASSERT(r, buf[i] == 42);
282 }
283 REPORTER_ASSERT(r, buf[17] == 47);
284 });
Mike Kleinb6149312020-02-26 13:04:23 -0600285}
Mike Klein11efa182020-02-27 12:04:37 -0600286
Mike Klein10fc1e62020-04-13 11:57:05 -0500287DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -0600288 skvm::Builder b;
289 {
290 auto src = b.varying<int>(),
291 dst = b.varying<int>();
292 b.store32(dst, b.load32(src));
293 }
294
Mike Klein10fc1e62020-04-13 11:57:05 -0500295 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
296 int src[] = {1,2,3,4,5,6,7,8,9},
297 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -0600298
Mike Klein10fc1e62020-04-13 11:57:05 -0500299 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
300 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
301 REPORTER_ASSERT(r, dst[i] == src[i]);
302 }
303 size_t i = SK_ARRAY_COUNT(src)-1;
304 REPORTER_ASSERT(r, dst[i] == 0);
305 });
Mike Klein11efa182020-02-27 12:04:37 -0600306}
Mike Kleinb6149312020-02-26 13:04:23 -0600307
Mike Klein81756e42019-06-12 11:36:28 -0500308DEF_TEST(SkVM_LoopCounts, r) {
309 // Make sure we cover all the exact N we want.
310
Mike Klein9977efa2019-07-15 12:22:36 -0500311 // buf[i] += 1
312 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500313 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500314 b.store32(arg,
315 b.add(b.splat(1),
316 b.load32(arg)));
317
Mike Klein10fc1e62020-04-13 11:57:05 -0500318 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500319 int buf[64];
320 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500321 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
322 buf[i] = i;
323 }
324 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500325
Mike Klein9977efa2019-07-15 12:22:36 -0500326 for (int i = 0; i < N; i++) {
327 REPORTER_ASSERT(r, buf[i] == i+1);
328 }
329 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
330 REPORTER_ASSERT(r, buf[i] == i);
331 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500332 }
333 });
Mike Klein81756e42019-06-12 11:36:28 -0500334}
Mike Klein05642042019-06-18 12:16:06 -0500335
Mike Kleinb2b6a992020-01-13 16:34:30 -0600336DEF_TEST(SkVM_gather32, r) {
337 skvm::Builder b;
338 {
339 skvm::Arg uniforms = b.uniform(),
340 buf = b.varying<int>();
341 skvm::I32 x = b.load32(buf);
342 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
343 }
344
Mike Klein10fc1e62020-04-13 11:57:05 -0500345 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600346 const int img[] = {12,34,56,78, 90,98,76,54};
347
348 int buf[20];
349 for (int i = 0; i < 20; i++) {
350 buf[i] = i;
351 }
352
353 struct Uniforms {
354 const int* img;
355 } uniforms{img};
356
357 program.eval(20, &uniforms, buf);
358 int i = 0;
359 REPORTER_ASSERT(r, buf[i] == 12); i++;
360 REPORTER_ASSERT(r, buf[i] == 34); i++;
361 REPORTER_ASSERT(r, buf[i] == 56); i++;
362 REPORTER_ASSERT(r, buf[i] == 78); i++;
363 REPORTER_ASSERT(r, buf[i] == 90); i++;
364 REPORTER_ASSERT(r, buf[i] == 98); i++;
365 REPORTER_ASSERT(r, buf[i] == 76); i++;
366 REPORTER_ASSERT(r, buf[i] == 54); i++;
367
368 REPORTER_ASSERT(r, buf[i] == 12); i++;
369 REPORTER_ASSERT(r, buf[i] == 34); i++;
370 REPORTER_ASSERT(r, buf[i] == 56); i++;
371 REPORTER_ASSERT(r, buf[i] == 78); i++;
372 REPORTER_ASSERT(r, buf[i] == 90); i++;
373 REPORTER_ASSERT(r, buf[i] == 98); i++;
374 REPORTER_ASSERT(r, buf[i] == 76); i++;
375 REPORTER_ASSERT(r, buf[i] == 54); i++;
376
377 REPORTER_ASSERT(r, buf[i] == 12); i++;
378 REPORTER_ASSERT(r, buf[i] == 34); i++;
379 REPORTER_ASSERT(r, buf[i] == 56); i++;
380 REPORTER_ASSERT(r, buf[i] == 78); i++;
381 });
382}
383
Mike Klein81d52672019-07-30 11:11:09 -0500384DEF_TEST(SkVM_gathers, r) {
385 skvm::Builder b;
386 {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600387 skvm::Arg uniforms = b.uniform(),
388 buf32 = b.varying<int>(),
389 buf16 = b.varying<uint16_t>(),
390 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500391
392 skvm::I32 x = b.load32(buf32);
393
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600394 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
395 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
396 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500397 }
398
Mike Klein10fc1e62020-04-13 11:57:05 -0500399 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500400 const int img[] = {12,34,56,78, 90,98,76,54};
401
402 constexpr int N = 20;
403 int buf32[N];
404 uint16_t buf16[N];
405 uint8_t buf8 [N];
406
407 for (int i = 0; i < 20; i++) {
408 buf32[i] = i;
409 }
410
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600411 struct Uniforms {
412 const int* img;
413 } uniforms{img};
414
415 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500416 int i = 0;
417 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
418 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
419 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
420 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
421 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
422 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
423 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
424 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
425
426 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
427 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
428 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
429 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
430 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
431 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
432 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
433 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
434
435 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
436 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
437 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
438 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
439 });
440}
441
442DEF_TEST(SkVM_bitops, r) {
443 skvm::Builder b;
444 {
445 skvm::Arg ptr = b.varying<int>();
446
447 skvm::I32 x = b.load32(ptr);
448
Mike Klein4067a942020-04-05 10:25:32 -0500449 x = b.bit_and (x, b.splat(0xf1)); // 0x40
450 x = b.bit_or (x, b.splat(0x80)); // 0xc0
451 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
452 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500453
454 x = b.shl(x, 28); // 0xe000'0000
455 x = b.sra(x, 28); // 0xffff'fffe
456 x = b.shr(x, 1); // 0x7fff'ffff
457
458 b.store32(ptr, x);
459 }
460
Mike Klein10fc1e62020-04-13 11:57:05 -0500461 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500462 int x = 0x42;
463 program.eval(1, &x);
464 REPORTER_ASSERT(r, x == 0x7fff'ffff);
465 });
466}
467
Mike Klein4067a942020-04-05 10:25:32 -0500468DEF_TEST(SkVM_select_is_NaN, r) {
469 skvm::Builder b;
470 {
471 skvm::Arg src = b.varying<float>(),
472 dst = b.varying<float>();
473
474 skvm::F32 x = b.loadF(src);
475 x = select(is_NaN(x), b.splat(0.0f)
476 , x);
477 b.storeF(dst, x);
478 }
479
480 std::vector<skvm::OptimizedInstruction> program = b.optimize();
481 REPORTER_ASSERT(r, program.size() == 4);
482 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
483 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
484 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
485 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
486
Mike Klein10fc1e62020-04-13 11:57:05 -0500487 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500488 // ±NaN, ±0, ±1, ±inf
489 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
490 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
491 uint32_t dst[SK_ARRAY_COUNT(src)];
492 program.eval(SK_ARRAY_COUNT(src), src, dst);
493
494 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
495 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
496 }
497 });
498}
499
Mike Klein81d52672019-07-30 11:11:09 -0500500DEF_TEST(SkVM_f32, r) {
501 skvm::Builder b;
502 {
503 skvm::Arg arg = b.varying<float>();
504
Mike Reedf5ff4c22020-03-23 14:57:53 -0400505 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500506 y = b.add(x,x), // y = 2x
507 z = b.sub(y,x), // z = 2x-x = x
508 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400509 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500510 }
511
Mike Klein10fc1e62020-04-13 11:57:05 -0500512 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500513 float buf[] = { 1,2,3,4,5,6,7,8,9 };
514 program.eval(SK_ARRAY_COUNT(buf), buf);
515 for (float v : buf) {
516 REPORTER_ASSERT(r, v == 1.0f);
517 }
518 });
519}
520
521DEF_TEST(SkVM_cmp_i32, r) {
522 skvm::Builder b;
523 {
524 skvm::I32 x = b.load32(b.varying<int>());
525
526 auto to_bit = [&](int shift, skvm::I32 mask) {
527 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
528 };
529
530 skvm::I32 m = b.splat(0);
531 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
532 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
533 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
534 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
535 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
536 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
537
538 b.store32(b.varying<int>(), m);
539 }
Mike Klein10fc1e62020-04-13 11:57:05 -0500540 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500541 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
542 int out[SK_ARRAY_COUNT(in)];
543
544 program.eval(SK_ARRAY_COUNT(in), in, out);
545
546 REPORTER_ASSERT(r, out[0] == 0b001111);
547 REPORTER_ASSERT(r, out[1] == 0b001100);
548 REPORTER_ASSERT(r, out[2] == 0b001010);
549 REPORTER_ASSERT(r, out[3] == 0b001010);
550 REPORTER_ASSERT(r, out[4] == 0b000010);
551 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
552 REPORTER_ASSERT(r, out[i] == 0b110010);
553 }
554 });
555}
556
557DEF_TEST(SkVM_cmp_f32, r) {
558 skvm::Builder b;
559 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400560 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500561
562 auto to_bit = [&](int shift, skvm::I32 mask) {
563 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
564 };
565
566 skvm::I32 m = b.splat(0);
567 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
568 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
569 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
570 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
571 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
572 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
573
574 b.store32(b.varying<int>(), m);
575 }
576
Mike Klein10fc1e62020-04-13 11:57:05 -0500577 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500578 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
579 int out[SK_ARRAY_COUNT(in)];
580
581 program.eval(SK_ARRAY_COUNT(in), in, out);
582
583 REPORTER_ASSERT(r, out[0] == 0b001111);
584 REPORTER_ASSERT(r, out[1] == 0b001100);
585 REPORTER_ASSERT(r, out[2] == 0b001010);
586 REPORTER_ASSERT(r, out[3] == 0b001010);
587 REPORTER_ASSERT(r, out[4] == 0b000010);
588 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
589 REPORTER_ASSERT(r, out[i] == 0b110010);
590 }
591 });
592}
593
Mike Klein14548b92020-02-28 14:02:29 -0600594DEF_TEST(SkVM_index, r) {
595 skvm::Builder b;
596 b.store32(b.varying<int>(), b.index());
597
Mike Klein10fc1e62020-04-13 11:57:05 -0500598 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600599 int buf[23];
600 program.eval(SK_ARRAY_COUNT(buf), buf);
601 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
602 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
603 }
604 });
605}
606
Mike Klein4a131192019-07-19 13:56:41 -0500607DEF_TEST(SkVM_mad, r) {
608 // This program is designed to exercise the tricky corners of instruction
609 // and register selection for Op::mad_f32.
610
611 skvm::Builder b;
612 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500613 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500614
615 skvm::F32 x = b.to_f32(b.load32(arg)),
616 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
617 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
618 w = b.mad(z,z,y), // w can alias z but not y.
619 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600620 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500621 }
622
Mike Klein10fc1e62020-04-13 11:57:05 -0500623 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500624 int x = 2;
625 program.eval(1, &x);
626 // x = 2
627 // y = 2*2 + 2 = 6
628 // z = 6*6 + 2 = 38
629 // w = 38*38 + 6 = 1450
630 // v = 1450*6 + 1450 = 10150
631 REPORTER_ASSERT(r, x == 10150);
632 });
633}
634
Mike Klein7c0332c2020-03-05 14:18:04 -0600635DEF_TEST(SkVM_fms, r) {
636 // Create a pattern that can be peepholed into an Op::fms_f32.
637 skvm::Builder b;
638 {
639 skvm::Arg arg = b.varying<int>();
640
641 skvm::F32 x = b.to_f32(b.load32(arg)),
642 v = b.sub(b.mul(x, b.splat(2.0f)),
643 b.splat(1.0f));
644 b.store32(arg, b.trunc(v));
645 }
646
Mike Klein10fc1e62020-04-13 11:57:05 -0500647 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600648 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
649 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
650
651 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
652 REPORTER_ASSERT(r, buf[i] = 2*i-1);
653 }
654 });
655}
656
657DEF_TEST(SkVM_fnma, r) {
658 // Create a pattern that can be peepholed into an Op::fnma_f32.
659 skvm::Builder b;
660 {
661 skvm::Arg arg = b.varying<int>();
662
663 skvm::F32 x = b.to_f32(b.load32(arg)),
664 v = b.sub(b.splat(1.0f),
665 b.mul(x, b.splat(2.0f)));
666 b.store32(arg, b.trunc(v));
667 }
668
Mike Klein10fc1e62020-04-13 11:57:05 -0500669 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600670 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
671 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
672
673 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
674 REPORTER_ASSERT(r, buf[i] = 1-2*i);
675 }
676 });
677}
678
Mike Klein81d52672019-07-30 11:11:09 -0500679DEF_TEST(SkVM_madder, r) {
680 skvm::Builder b;
681 {
682 skvm::Arg arg = b.varying<float>();
683
Mike Reedf5ff4c22020-03-23 14:57:53 -0400684 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500685 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
686 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
687 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400688 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500689 }
690
Mike Klein10fc1e62020-04-13 11:57:05 -0500691 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500692 float x = 2.0f;
693 // y = 2*2 + 2 = 6
694 // z = 6*2 + 6 = 18
695 // w = 6*6 + 18 = 54
696 program.eval(1, &x);
697 REPORTER_ASSERT(r, x == 54.0f);
698 });
699}
700
Mike Kleinf22faaf2020-01-09 07:27:39 -0600701DEF_TEST(SkVM_floor, r) {
702 skvm::Builder b;
703 {
704 skvm::Arg arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400705 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600706 }
707
Mike Klein10fc1e62020-04-13 11:57:05 -0500708 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600709 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
710 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
711 program.eval(SK_ARRAY_COUNT(buf), buf);
712 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
713 REPORTER_ASSERT(r, buf[i] == want[i]);
714 }
715 });
716}
717
Mike Klein5caf7de2020-03-12 11:05:46 -0500718DEF_TEST(SkVM_round, r) {
719 skvm::Builder b;
720 {
721 skvm::Arg src = b.varying<float>();
722 skvm::Arg dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400723 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500724 }
725
726 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
727 // We haven't explicitly guaranteed that here... it just probably is.
Mike Klein10fc1e62020-04-13 11:57:05 -0500728 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500729 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
730 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
731 int dst[SK_ARRAY_COUNT(buf)];
732
733 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
734 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
735 REPORTER_ASSERT(r, dst[i] == want[i]);
736 }
737 });
738}
739
Herb Derbyc02a41f2020-02-28 14:25:45 -0600740DEF_TEST(SkVM_min, r) {
741 skvm::Builder b;
742 {
743 skvm::Arg src1 = b.varying<float>();
744 skvm::Arg src2 = b.varying<float>();
745 skvm::Arg dst = b.varying<float>();
746
Mike Reedf5ff4c22020-03-23 14:57:53 -0400747 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600748 }
749
Mike Klein10fc1e62020-04-13 11:57:05 -0500750 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600751 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
752 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
753 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
754 float d[SK_ARRAY_COUNT(s1)];
755 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
756 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
757 REPORTER_ASSERT(r, d[i] == want[i]);
758 }
759 });
760}
761
762DEF_TEST(SkVM_max, r) {
763 skvm::Builder b;
764 {
765 skvm::Arg src1 = b.varying<float>();
766 skvm::Arg src2 = b.varying<float>();
767 skvm::Arg dst = b.varying<float>();
768
Mike Reedf5ff4c22020-03-23 14:57:53 -0400769 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600770 }
771
Mike Klein10fc1e62020-04-13 11:57:05 -0500772 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600773 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
774 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
775 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
776 float d[SK_ARRAY_COUNT(s1)];
777 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
778 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
779 REPORTER_ASSERT(r, d[i] == want[i]);
780 }
781 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600782}
783
Mike Kleinf98d0d32019-07-22 14:30:18 -0500784DEF_TEST(SkVM_hoist, r) {
785 // This program uses enough constants that it will fail to JIT if we hoist them.
786 // The JIT will try again without hoisting, and that'll just need 2 registers.
787 skvm::Builder b;
788 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500789 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500790 skvm::I32 x = b.load32(arg);
791 for (int i = 0; i < 32; i++) {
792 x = b.add(x, b.splat(i));
793 }
794 b.store32(arg, x);
795 }
796
Mike Klein10fc1e62020-04-13 11:57:05 -0500797 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500798 int x = 4;
799 program.eval(1, &x);
800 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
801 // x += 496
802 REPORTER_ASSERT(r, x == 500);
803 });
804}
805
Mike Kleinb9944122019-08-02 12:22:39 -0500806DEF_TEST(SkVM_select, r) {
807 skvm::Builder b;
808 {
809 skvm::Arg buf = b.varying<int>();
810
811 skvm::I32 x = b.load32(buf);
812
813 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
814
815 b.store32(buf, x);
816 }
817
Mike Klein10fc1e62020-04-13 11:57:05 -0500818 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500819 int buf[] = { 0,1,2,3,4,5,6,7,8 };
820 program.eval(SK_ARRAY_COUNT(buf), buf);
821 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
822 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
823 }
824 });
825}
826
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500827DEF_TEST(SkVM_NewOps, r) {
828 // Exercise a somewhat arbitrary set of new ops.
829 skvm::Builder b;
830 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500831 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500832 uniforms = b.uniform();
833
834 skvm::I32 x = b.load16(buf);
835
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600836 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500837
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600838 x = b.add(x, b.uniform32(uniforms, kPtr+0));
839 x = b.mul(x, b.uniform8 (uniforms, kPtr+4));
840 x = b.sub(x, b.uniform16(uniforms, kPtr+6));
841
842 skvm::I32 limit = b.uniform32(uniforms, kPtr+8);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500843 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
844 x = b.select(b.gt(x, limit ), limit , x);
845
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600846 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500847
848 b.store16(buf, x);
849 }
850
851 if ((false)) {
852 SkDynamicMemoryWStream buf;
853 dump(b, &buf);
854 sk_sp<SkData> blob = buf.detachAsData();
855 SkDebugf("%.*s\n", blob->size(), blob->data());
856 }
857
Mike Klein10fc1e62020-04-13 11:57:05 -0500858 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500859 const int N = 31;
860 int16_t buf[N];
861 for (int i = 0; i < N; i++) {
862 buf[i] = i;
863 }
864
865 const int M = 16;
866 uint8_t img[M];
867 for (int i = 0; i < M; i++) {
868 img[i] = i*i;
869 }
870
871 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600872 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500873 int add = 5;
874 uint8_t mul = 3;
875 uint16_t sub = 18;
876 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600877 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500878
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600879 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500880
881 for (int i = 0; i < N; i++) {
882 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
883 int x = 3*(i-1);
884
885 // Then that's pinned to the limits of img.
886 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
887 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
888 REPORTER_ASSERT(r, buf[i] == img[x]);
889 }
890 });
891}
892
Mike Klein5a8404c2020-02-28 14:24:56 -0600893DEF_TEST(SkVM_sqrt, r) {
894 skvm::Builder b;
895 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400896 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600897
Mike Klein10fc1e62020-04-13 11:57:05 -0500898 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600899 constexpr int K = 17;
900 float buf[K];
901 for (int i = 0; i < K; i++) {
902 buf[i] = (float)(i*i);
903 }
904
905 // x^2 -> x
906 program.eval(K, buf);
907
908 for (int i = 0; i < K; i++) {
909 REPORTER_ASSERT(r, buf[i] == (float)i);
910 }
911 });
912}
913
Mike Klein3f7c8652019-11-07 10:33:56 -0600914DEF_TEST(SkVM_MSAN, r) {
915 // This little memset32() program should be able to JIT, but if we run that
916 // JIT code in an MSAN build, it won't see the writes initialize buf. So
917 // this tests that we're using the interpreter instead.
918 skvm::Builder b;
919 b.store32(b.varying<int>(), b.splat(42));
920
Mike Klein10fc1e62020-04-13 11:57:05 -0500921 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600922 constexpr int K = 17;
923 int buf[K]; // Intentionally uninitialized.
924 program.eval(K, buf);
925 sk_msan_assert_initialized(buf, buf+K);
926 for (int x : buf) {
927 REPORTER_ASSERT(r, x == 42);
928 }
929 });
930}
931
Mike Klein13601172019-11-08 15:01:02 -0600932DEF_TEST(SkVM_assert, r) {
933 skvm::Builder b;
934 b.assert_true(b.lt(b.load32(b.varying<int>()),
935 b.splat(42)));
936
Mike Klein10fc1e62020-04-13 11:57:05 -0500937 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600938 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600939 program.eval(SK_ARRAY_COUNT(buf), buf);
940 });
941}
942
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600943DEF_TEST(SkVM_premul, reporter) {
944 // Test that premul is short-circuited when alpha is known opaque.
945 {
946 skvm::Builder p;
947 auto rptr = p.varying<int>(),
948 aptr = p.varying<int>();
949
Mike Reedf5ff4c22020-03-23 14:57:53 -0400950 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600951 g = p.splat(0.0f),
952 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400953 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600954
955 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400956 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600957
958 // load red, load alpha, red *= alpha, store red
959 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
960 }
961
962 {
963 skvm::Builder p;
964 auto rptr = p.varying<int>();
965
Mike Reedf5ff4c22020-03-23 14:57:53 -0400966 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600967 g = p.splat(0.0f),
968 b = p.splat(0.0f),
969 a = p.splat(1.0f);
970
971 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400972 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600973
974 // load red, store red
975 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
976 }
977
978 // Same deal for unpremul.
979 {
980 skvm::Builder p;
981 auto rptr = p.varying<int>(),
982 aptr = p.varying<int>();
983
Mike Reedf5ff4c22020-03-23 14:57:53 -0400984 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600985 g = p.splat(0.0f),
986 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400987 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600988
989 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400990 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600991
992 // load red, load alpha, a bunch of unpremul instructions, store red
993 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
994 }
995
996 {
997 skvm::Builder p;
998 auto rptr = p.varying<int>();
999
Mike Reedf5ff4c22020-03-23 14:57:53 -04001000 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001001 g = p.splat(0.0f),
1002 b = p.splat(0.0f),
1003 a = p.splat(1.0f);
1004
1005 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001006 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001007
1008 // load red, store red
1009 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1010 }
1011}
Mike Klein05642042019-06-18 12:16:06 -05001012
Mike Klein05642042019-06-18 12:16:06 -05001013template <typename Fn>
1014static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -04001015 uint8_t buf[4096];
1016 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -05001017 fn(a);
1018
1019 REPORTER_ASSERT(r, a.size() == expected.size());
1020
Mike Klein88c0a902019-06-24 15:34:02 -04001021 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -05001022 want = expected.begin();
1023 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -05001024 REPORTER_ASSERT(r, got[i] == want[i],
1025 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -05001026 }
1027}
1028
1029DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -05001030 // Easiest way to generate test cases is
1031 //
1032 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1033 //
1034 // The -x86-asm-syntax=intel bit is optional, controlling the
1035 // input syntax only; the output will always be AT&T op x,y,dst style.
1036 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1037 // that a bit easier to use here, despite maybe favoring AT&T overall.
1038
1039 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -05001040 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -05001041 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -06001042 a.int3();
Mike Klein05642042019-06-18 12:16:06 -05001043 a.vzeroupper();
1044 a.ret();
1045 },{
Mike Kleinee5864a2019-11-11 09:16:44 -06001046 0xcc,
Mike Klein05642042019-06-18 12:16:06 -05001047 0xc5, 0xf8, 0x77,
1048 0xc3,
1049 });
1050
Mike Klein237dbb42019-07-19 09:44:47 -05001051 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -05001052 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -05001053 a.ret();
1054 a.align(4);
1055 },{
1056 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -05001057 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -05001058 });
Mike Klein61703a62019-06-18 15:01:12 -05001059
Mike Klein397fc882019-06-20 11:37:10 -05001060 test_asm(r, [&](A& a) {
1061 a.add(A::rax, 8); // Always good to test rax.
1062 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001063
Mike Klein397fc882019-06-20 11:37:10 -05001064 a.add(A::rdi, 12); // Last 0x48 REX
1065 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001066
Mike Klein86a645c2019-07-12 12:29:39 -05001067 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001068 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001069
Mike Klein397fc882019-06-20 11:37:10 -05001070 a.add(A::rsi, 128); // Requires 4 byte immediate.
1071 a.sub(A::r8 , 1000000);
Mike Klein61703a62019-06-18 15:01:12 -05001072 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001073 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001074 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001075
1076 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001077 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001078
Mike Klein86a645c2019-07-12 12:29:39 -05001079 0x49, 0x83, 0b11'000'000, 0x07,
1080 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001081
1082 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001083 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Klein61703a62019-06-18 15:01:12 -05001084 });
Mike Klein397fc882019-06-20 11:37:10 -05001085
1086
1087 test_asm(r, [&](A& a) {
1088 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1089 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1090 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1091 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1092 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1093 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1094 },{
1095 /* VEX */ /*op*/ /*modRM*/
1096 0xc5, 0xf5, 0xfe, 0xc2,
1097 0xc5, 0x75, 0xfe, 0xc2,
1098 0xc5, 0xbd, 0xfe, 0xc2,
1099 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1100 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1101 0xc5, 0xf5, 0xfa, 0xc2,
1102 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001103
1104 test_asm(r, [&](A& a) {
Mike Klein714f8cc2019-11-06 12:54:46 -06001105 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1106 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1107 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1108 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1109 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1110 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Kleinb9944122019-08-02 12:22:39 -05001111 },{
1112 0xc5,0xf5,0x76,0xc2,
1113 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001114 0xc5,0xf4,0xc2,0xc2,0x00,
1115 0xc5,0xf4,0xc2,0xc2,0x01,
1116 0xc5,0xf4,0xc2,0xc2,0x02,
1117 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001118 });
1119
1120 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001121 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1122 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1123 },{
1124 0xc5,0xf4,0x5d,0xc2,
1125 0xc5,0xf4,0x5f,0xc2,
1126 });
1127
1128 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001129 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1130 },{
1131 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1132 });
1133
1134 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001135 a.vpsrld(A::ymm15, A::ymm2, 8);
1136 a.vpsrld(A::ymm0 , A::ymm8, 5);
1137 },{
1138 0xc5, 0x85, 0x72,0xd2, 0x08,
1139 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1140 });
1141
1142 test_asm(r, [&](A& a) {
1143 a.vpermq(A::ymm1, A::ymm2, 5);
1144 },{
1145 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1146 });
Mike Kleine5053412019-06-21 12:37:22 -05001147
1148 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001149 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1150 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1151 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1152 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1153 },{
1154 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1155 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1156 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1157 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1158 });
1159
1160 test_asm(r, [&](A& a) {
Mike Kleine5053412019-06-21 12:37:22 -05001161 A::Label l = a.here();
1162 a.byte(1);
1163 a.byte(2);
1164 a.byte(3);
1165 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001166
Mike Klein65c10b52019-07-12 09:22:21 -05001167 a.vbroadcastss(A::ymm0 , &l);
1168 a.vbroadcastss(A::ymm1 , &l);
1169 a.vbroadcastss(A::ymm8 , &l);
1170 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001171
Mike Klein65c10b52019-07-12 09:22:21 -05001172 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001173 a.vpaddd (A::ymm4, A::ymm3, &l);
1174 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001175
1176 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001177
1178 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001179 },{
1180 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001181
Mike Kleine5053412019-06-21 12:37:22 -05001182 /* VEX */ /*op*/ /* ModRM */ /* offset */
1183 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1184 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1185 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1186 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001187
1188 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001189
1190 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1191 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001192
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001193 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1194
1195 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001196 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001197
1198 test_asm(r, [&](A& a) {
Mike Klein788967e2019-08-02 10:15:51 -05001199 a.vbroadcastss(A::ymm0, A::rdi, 0);
1200 a.vbroadcastss(A::ymm13, A::r14, 7);
1201 a.vbroadcastss(A::ymm8, A::rdx, -12);
1202 a.vbroadcastss(A::ymm8, A::rdx, 400);
Mike Klein94d054b2019-08-02 10:54:23 -05001203
1204 a.vbroadcastss(A::ymm8, A::xmm0);
1205 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001206 },{
1207 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1208 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1209 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1210 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1211 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001212
1213 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1214 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001215 });
1216
1217 test_asm(r, [&](A& a) {
Mike Klein060eaaa2019-06-21 14:42:09 -05001218 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -05001219 a.jne(&l);
1220 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001221 a.je (&l);
1222 a.jmp(&l);
1223 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001224 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001225
1226 a.cmp(A::rdx, 0);
1227 a.cmp(A::rax, 12);
1228 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001229 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001230 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1231 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1232 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1233 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1234 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001235 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001236
1237 0x48,0x83,0xfa,0x00,
1238 0x48,0x83,0xf8,0x0c,
1239 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001240 });
Mike Klein120d9e82019-06-21 15:52:55 -05001241
1242 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001243 a.vmovups(A::ymm5, A::Mem{A::rsi});
1244 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001245
Mike Klein95529e82019-08-02 11:43:43 -05001246 a.vmovups(A::rsi, A::xmm5);
1247
Mike Kleinedc2dac2020-04-15 16:18:27 -05001248 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1249 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001250
1251 a.vmovq(A::rdx, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001252 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001253 /* VEX */ /*Op*/ /* ModRM */
1254 0xc5, 0xfc, 0x10, 0b00'101'110,
1255 0xc5, 0xfc, 0x11, 0b00'101'110,
1256
Mike Klein95529e82019-08-02 11:43:43 -05001257 0xc5, 0xf8, 0x11, 0b00'101'110,
1258
Mike Klein52010b72019-08-02 11:18:00 -05001259 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001260 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001261
1262 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001263 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001264
1265 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001266 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1267 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1268 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001269
Mike Kleinedc2dac2020-04-15 16:18:27 -05001270 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1271 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1272 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001273 },{
1274 0xc5,0xfc,0x10,0x2c,0x24,
1275 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1276 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1277
1278 0xc5,0xfc,0x11,0x2c,0x24,
1279 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1280 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1281 });
1282
1283 test_asm(r, [&](A& a) {
Mike Klein94d054b2019-08-02 10:54:23 -05001284 a.movzbl(A::rax, A::rsi, 0); // Low registers for src and dst.
1285 a.movzbl(A::rax, A::r8, 0); // High src register.
1286 a.movzbl(A::r8 , A::rsi, 0); // High dst register.
1287 a.movzbl(A::r8, A::rsi, 12);
1288 a.movzbl(A::r8, A::rsi, 400);
Mike Klein35b97c32019-07-12 12:32:45 -05001289
Mike Kleincb511042020-04-13 13:12:17 -05001290 a.movzwl(A::rax, A::rsi, 0); // Low registers for src and dst.
1291 a.movzwl(A::rax, A::r8, 0); // High src register.
1292 a.movzwl(A::r8 , A::rsi, 0); // High dst register.
1293 a.movzwl(A::r8, A::rsi, 12);
1294 a.movzwl(A::r8, A::rsi, 400);
1295
Mike Klein35b97c32019-07-12 12:32:45 -05001296 a.vmovd(A::rax, A::xmm0);
1297 a.vmovd(A::rax, A::xmm8);
1298 a.vmovd(A::r8, A::xmm0);
1299
1300 a.vmovd(A::xmm0, A::rax);
1301 a.vmovd(A::xmm8, A::rax);
1302 a.vmovd(A::xmm0, A::r8);
1303
Mike Klein93d3fab2020-01-14 10:46:44 -06001304 a.vmovd(A::xmm0 , A::FOUR, A::rcx, A::rax);
1305 a.vmovd(A::xmm15, A::TWO, A::r8, A::rax);
1306 a.vmovd(A::xmm0 , A::ONE, A::rcx, A::r8);
1307
Mike Klein35b97c32019-07-12 12:32:45 -05001308 a.vmovd_direct(A::rax, A::xmm0);
1309 a.vmovd_direct(A::rax, A::xmm8);
1310 a.vmovd_direct(A::r8, A::xmm0);
1311
1312 a.vmovd_direct(A::xmm0, A::rax);
1313 a.vmovd_direct(A::xmm8, A::rax);
1314 a.vmovd_direct(A::xmm0, A::r8);
1315
1316 a.movb(A::rdx, A::rax);
1317 a.movb(A::rdx, A::r8);
1318 a.movb(A::r8 , A::rax);
1319 },{
Mike Kleincb511042020-04-13 13:12:17 -05001320 0x0f,0xb6,0x06, // movzbl (%rsi), %eax
Mike Klein35b97c32019-07-12 12:32:45 -05001321 0x41,0x0f,0xb6,0x00,
1322 0x44,0x0f,0xb6,0x06,
Mike Klein94d054b2019-08-02 10:54:23 -05001323 0x44,0x0f,0xb6,0x46, 12,
1324 0x44,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
Mike Klein35b97c32019-07-12 12:32:45 -05001325
Mike Kleincb511042020-04-13 13:12:17 -05001326 0x0f,0xb7,0x06, // movzwl (%rsi), %eax
1327 0x41,0x0f,0xb7,0x00,
1328 0x44,0x0f,0xb7,0x06,
1329 0x44,0x0f,0xb7,0x46, 12,
1330 0x44,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
1331
Mike Klein35b97c32019-07-12 12:32:45 -05001332 0xc5,0xf9,0x7e,0x00,
1333 0xc5,0x79,0x7e,0x00,
1334 0xc4,0xc1,0x79,0x7e,0x00,
1335
1336 0xc5,0xf9,0x6e,0x00,
1337 0xc5,0x79,0x6e,0x00,
1338 0xc4,0xc1,0x79,0x6e,0x00,
1339
Mike Klein93d3fab2020-01-14 10:46:44 -06001340 0xc5,0xf9,0x6e,0x04,0x88,
1341 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1342 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1343
Mike Klein35b97c32019-07-12 12:32:45 -05001344 0xc5,0xf9,0x7e,0xc0,
1345 0xc5,0x79,0x7e,0xc0,
1346 0xc4,0xc1,0x79,0x7e,0xc0,
1347
1348 0xc5,0xf9,0x6e,0xc0,
1349 0xc5,0x79,0x6e,0xc0,
1350 0xc4,0xc1,0x79,0x6e,0xc0,
1351
1352 0x88, 0x02,
1353 0x44, 0x88, 0x02,
1354 0x41, 0x88, 0x00,
1355 });
1356
1357 test_asm(r, [&](A& a) {
Mike Klein52010b72019-08-02 11:18:00 -05001358 a.vpinsrw(A::xmm1, A::xmm8, A::rsi, 4);
1359 a.vpinsrw(A::xmm8, A::xmm1, A::r8, 12);
1360
Mike Klein35b97c32019-07-12 12:32:45 -05001361 a.vpinsrb(A::xmm1, A::xmm8, A::rsi, 4);
1362 a.vpinsrb(A::xmm8, A::xmm1, A::r8, 12);
1363
Mike Klein95529e82019-08-02 11:43:43 -05001364 a.vpextrw(A::rsi, A::xmm8, 7);
1365 a.vpextrw(A::r8, A::xmm1, 15);
1366
Mike Klein35b97c32019-07-12 12:32:45 -05001367 a.vpextrb(A::rsi, A::xmm8, 7);
1368 a.vpextrb(A::r8, A::xmm1, 15);
1369 },{
Mike Klein52010b72019-08-02 11:18:00 -05001370 0xc5,0xb9, 0xc4, 0x0e, 4,
1371 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1372
Mike Klein35b97c32019-07-12 12:32:45 -05001373 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1374 0xc4,0x43,0x71, 0x20, 0x00, 12,
1375
Mike Klein95529e82019-08-02 11:43:43 -05001376 0xc4,0x63,0x79, 0x15, 0x06, 7,
1377 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1378
Mike Klein35b97c32019-07-12 12:32:45 -05001379 0xc4,0x63,0x79, 0x14, 0x06, 7,
1380 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1381 });
1382
1383 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001384 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1385 },{
1386 0xc5, 0x9d, 0xdf, 0xda,
1387 });
Mike Klein9f4df802019-06-24 18:47:16 -04001388
Mike Kleind4546d62019-07-30 12:15:40 -05001389 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001390 A::Label l;
1391 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1392
1393 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1394 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1395 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1396
1397 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1398 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1399
1400 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1401 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1402 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1403 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1404 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1405
1406 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1407 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1408 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1409
Mike Kleind4546d62019-07-30 12:15:40 -05001410 a.vcvttps2dq(A::ymm3, A::ymm2);
1411 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001412 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001413 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001414 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001415 },{
1416 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001417
1418 0xc5,0xfd,0x6f,0x1e,
1419 0xc5,0xfd,0x6f,0x1c,0x24,
1420 0xc4,0xc1,0x7d,0x6f,0x1b,
1421
1422 0xc5,0xfd,0x6f,0x5e,0x04,
1423 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1424
1425 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1426 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1427 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1428 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1429 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1430
1431 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1432 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1433
1434 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1435
Mike Kleind4546d62019-07-30 12:15:40 -05001436 0xc5,0xfe,0x5b,0xda,
1437 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001438 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001439 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001440 });
1441
Mike Kleinbeaa1082020-01-13 14:04:18 -06001442 test_asm(r, [&](A& a) {
1443 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1444 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1445 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1446 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1447 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1448 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1449 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1450 },{
1451 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1452 0xc4,0xe2,0x75,0x92,0x04,0x10,
1453 0xc4,0x62,0x75,0x92,0x14,0x10,
1454 0xc4,0xa2,0x75,0x92,0x04,0x20,
1455 0xc4,0xc2,0x75,0x92,0x04,0x11,
1456 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1457 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1458 });
1459
Mike Kleinc322f632020-01-13 16:18:58 -06001460 test_asm(r, [&](A& a) {
1461 a.movq(A::rax, A::rdi, 0);
1462 a.movq(A::rax, A::rdi, 1);
1463 a.movq(A::rax, A::rdi, 512);
1464 a.movq(A::r15, A::r13, 42);
1465 a.movq(A::rax, A::r13, 42);
1466 a.movq(A::r15, A::rax, 42);
1467 },{
1468 0x48, 0x8b, 0x07,
1469 0x48, 0x8b, 0x47, 0x01,
1470 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1471 0x4d, 0x8b, 0x7d, 0x2a,
1472 0x49, 0x8b, 0x45, 0x2a,
1473 0x4c, 0x8b, 0x78, 0x2a,
1474 });
1475
Mike Klein9f4df802019-06-24 18:47:16 -04001476 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1477
1478 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001479 a.and16b(A::v4, A::v3, A::v1);
1480 a.orr16b(A::v4, A::v3, A::v1);
1481 a.eor16b(A::v4, A::v3, A::v1);
1482 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001483 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001484 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001485
1486 a.add4s(A::v4, A::v3, A::v1);
1487 a.sub4s(A::v4, A::v3, A::v1);
1488 a.mul4s(A::v4, A::v3, A::v1);
1489
Mike Klein97afd2e2019-10-16 14:11:27 -05001490 a.cmeq4s(A::v4, A::v3, A::v1);
1491 a.cmgt4s(A::v4, A::v3, A::v1);
1492
Mike Klein65809142019-06-25 09:44:02 -04001493 a.sub8h(A::v4, A::v3, A::v1);
1494 a.mul8h(A::v4, A::v3, A::v1);
1495
Mike Klein9f4df802019-06-24 18:47:16 -04001496 a.fadd4s(A::v4, A::v3, A::v1);
1497 a.fsub4s(A::v4, A::v3, A::v1);
1498 a.fmul4s(A::v4, A::v3, A::v1);
1499 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001500 a.fmin4s(A::v4, A::v3, A::v1);
1501 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein7c0332c2020-03-05 14:18:04 -06001502 a.fneg4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001503
Mike Klein65809142019-06-25 09:44:02 -04001504 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001505 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001506
1507 a.fcmeq4s(A::v4, A::v3, A::v1);
1508 a.fcmgt4s(A::v4, A::v3, A::v1);
1509 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001510 },{
Mike Klein65809142019-06-25 09:44:02 -04001511 0x64,0x1c,0x21,0x4e,
1512 0x64,0x1c,0xa1,0x4e,
1513 0x64,0x1c,0x21,0x6e,
1514 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001515 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001516 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001517
1518 0x64,0x84,0xa1,0x4e,
1519 0x64,0x84,0xa1,0x6e,
1520 0x64,0x9c,0xa1,0x4e,
1521
Mike Klein97afd2e2019-10-16 14:11:27 -05001522 0x64,0x8c,0xa1,0x6e,
1523 0x64,0x34,0xa1,0x4e,
1524
Mike Klein65809142019-06-25 09:44:02 -04001525 0x64,0x84,0x61,0x6e,
1526 0x64,0x9c,0x61,0x4e,
1527
Mike Klein9f4df802019-06-24 18:47:16 -04001528 0x64,0xd4,0x21,0x4e,
1529 0x64,0xd4,0xa1,0x4e,
1530 0x64,0xdc,0x21,0x6e,
1531 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001532 0x64,0xf4,0xa1,0x4e,
1533 0x64,0xf4,0x21,0x4e,
Mike Klein7c0332c2020-03-05 14:18:04 -06001534 0x64,0xf8,0xa0,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001535
Mike Klein65809142019-06-25 09:44:02 -04001536 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001537 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001538
1539 0x64,0xe4,0x21,0x4e,
1540 0x64,0xe4,0xa1,0x6e,
1541 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001542 });
1543
1544 test_asm(r, [&](A& a) {
1545 a.shl4s(A::v4, A::v3, 0);
1546 a.shl4s(A::v4, A::v3, 1);
1547 a.shl4s(A::v4, A::v3, 8);
1548 a.shl4s(A::v4, A::v3, 16);
1549 a.shl4s(A::v4, A::v3, 31);
1550
1551 a.sshr4s(A::v4, A::v3, 1);
1552 a.sshr4s(A::v4, A::v3, 8);
1553 a.sshr4s(A::v4, A::v3, 31);
1554
1555 a.ushr4s(A::v4, A::v3, 1);
1556 a.ushr4s(A::v4, A::v3, 8);
1557 a.ushr4s(A::v4, A::v3, 31);
1558
1559 a.ushr8h(A::v4, A::v3, 1);
1560 a.ushr8h(A::v4, A::v3, 8);
1561 a.ushr8h(A::v4, A::v3, 15);
1562 },{
1563 0x64,0x54,0x20,0x4f,
1564 0x64,0x54,0x21,0x4f,
1565 0x64,0x54,0x28,0x4f,
1566 0x64,0x54,0x30,0x4f,
1567 0x64,0x54,0x3f,0x4f,
1568
1569 0x64,0x04,0x3f,0x4f,
1570 0x64,0x04,0x38,0x4f,
1571 0x64,0x04,0x21,0x4f,
1572
1573 0x64,0x04,0x3f,0x6f,
1574 0x64,0x04,0x38,0x6f,
1575 0x64,0x04,0x21,0x6f,
1576
1577 0x64,0x04,0x1f,0x6f,
1578 0x64,0x04,0x18,0x6f,
1579 0x64,0x04,0x11,0x6f,
1580 });
1581
1582 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001583 a.sli4s(A::v4, A::v3, 0);
1584 a.sli4s(A::v4, A::v3, 1);
1585 a.sli4s(A::v4, A::v3, 8);
1586 a.sli4s(A::v4, A::v3, 16);
1587 a.sli4s(A::v4, A::v3, 31);
1588 },{
1589 0x64,0x54,0x20,0x6f,
1590 0x64,0x54,0x21,0x6f,
1591 0x64,0x54,0x28,0x6f,
1592 0x64,0x54,0x30,0x6f,
1593 0x64,0x54,0x3f,0x6f,
1594 });
1595
1596 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001597 a.scvtf4s (A::v4, A::v3);
1598 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001599 a.fcvtns4s(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001600 },{
1601 0x64,0xd8,0x21,0x4e,
1602 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001603 0x64,0xa8,0x21,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001604 });
Mike Klein15a368d2019-06-26 10:21:12 -04001605
1606 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001607 a.brk(0);
1608 a.brk(65535);
1609
Mike Klein15a368d2019-06-26 10:21:12 -04001610 a.ret(A::x30); // Conventional ret using link register.
1611 a.ret(A::x13); // Can really return using any register if we like.
1612
1613 a.add(A::x2, A::x2, 4);
1614 a.add(A::x3, A::x2, 32);
1615
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001616 a.sub(A::x2, A::x2, 4);
1617 a.sub(A::x3, A::x2, 32);
1618
Mike Klein15a368d2019-06-26 10:21:12 -04001619 a.subs(A::x2, A::x2, 4);
1620 a.subs(A::x3, A::x2, 32);
1621
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001622 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1623 a.cmp(A::x2, 4);
1624
Mike Klein15a368d2019-06-26 10:21:12 -04001625 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -05001626 a.bne(&l);
1627 a.bne(&l);
1628 a.blt(&l);
1629 a.b(&l);
1630 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001631 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -04001632 },{
Mike Klein37be7712019-11-13 13:19:01 -06001633 0x00,0x00,0x20,0xd4,
1634 0xe0,0xff,0x3f,0xd4,
1635
Mike Klein15a368d2019-06-26 10:21:12 -04001636 0xc0,0x03,0x5f,0xd6,
1637 0xa0,0x01,0x5f,0xd6,
1638
1639 0x42,0x10,0x00,0x91,
1640 0x43,0x80,0x00,0x91,
1641
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001642 0x42,0x10,0x00,0xd1,
1643 0x43,0x80,0x00,0xd1,
1644
Mike Klein15a368d2019-06-26 10:21:12 -04001645 0x42,0x10,0x00,0xf1,
1646 0x43,0x80,0x00,0xf1,
1647
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001648 0x5f,0x10,0x00,0xf1,
1649 0x5f,0x10,0x00,0xf1,
1650
1651 0x01,0x00,0x00,0x54, // b.ne #0
1652 0xe1,0xff,0xff,0x54, // b.ne #-4
1653 0xcb,0xff,0xff,0x54, // b.lt #-8
1654 0xae,0xff,0xff,0x54, // b.al #-12
1655 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1656 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -04001657 });
Mike Kleine51632e2019-06-26 14:47:43 -04001658
Mike Kleince7b88c2019-07-11 14:06:40 -05001659 // Can we cbz() to a not-yet-defined label?
1660 test_asm(r, [&](A& a) {
1661 A::Label l;
1662 a.cbz(A::x2, &l);
1663 a.add(A::x3, A::x2, 32);
1664 a.label(&l);
1665 a.ret(A::x30);
1666 },{
1667 0x42,0x00,0x00,0xb4, // cbz x2, #8
1668 0x43,0x80,0x00,0x91, // add x3, x2, #32
1669 0xc0,0x03,0x5f,0xd6, // ret
1670 });
1671
1672 // If we start a label as a backward label,
1673 // can we redefine it to be a future label?
1674 // (Not sure this is useful... just want to test it works.)
1675 test_asm(r, [&](A& a) {
1676 A::Label l1 = a.here();
1677 a.add(A::x3, A::x2, 32);
1678 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1679
1680 A::Label l2 = a.here(); // Start off the same...
1681 a.add(A::x3, A::x2, 32);
1682 a.cbz(A::x2, &l2); // Looks like this will go backward...
1683 a.add(A::x2, A::x2, 4);
1684 a.add(A::x3, A::x2, 32);
1685 a.label(&l2); // But no... actually forward! What a switcheroo!
1686 },{
1687 0x43,0x80,0x00,0x91, // add x3, x2, #32
1688 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1689
1690 0x43,0x80,0x00,0x91, // add x3, x2, #32
1691 0x62,0x00,0x00,0xb4, // cbz x2, #12
1692 0x42,0x10,0x00,0x91, // add x2, x2, #4
1693 0x43,0x80,0x00,0x91, // add x3, x2, #32
1694 });
1695
Mike Klein81d52672019-07-30 11:11:09 -05001696 // Loading from a label on ARM.
1697 test_asm(r, [&](A& a) {
1698 A::Label fore,aft;
1699 a.label(&fore);
1700 a.word(0x01234567);
1701 a.ldrq(A::v1, &fore);
1702 a.ldrq(A::v2, &aft);
1703 a.label(&aft);
1704 a.word(0x76543210);
1705 },{
1706 0x67,0x45,0x23,0x01,
1707 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1708 0x22,0x00,0x00,0x9c, // ldr q2, #4
1709 0x10,0x32,0x54,0x76,
1710 });
1711
Mike Kleine51632e2019-06-26 14:47:43 -04001712 test_asm(r, [&](A& a) {
1713 a.ldrq(A::v0, A::x8);
1714 a.strq(A::v0, A::x8);
1715 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001716 0x00,0x01,0xc0,0x3d,
1717 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001718 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001719
1720 test_asm(r, [&](A& a) {
1721 a.xtns2h(A::v0, A::v0);
1722 a.xtnh2b(A::v0, A::v0);
1723 a.strs (A::v0, A::x0);
1724
1725 a.ldrs (A::v0, A::x0);
1726 a.uxtlb2h(A::v0, A::v0);
1727 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001728
1729 a.uminv4s(A::v3, A::v4);
1730 a.fmovs (A::x3, A::v4); // fmov w3,s4
Mike Klein1fa149a2019-07-01 11:18:08 -05001731 },{
1732 0x00,0x28,0x61,0x0e,
1733 0x00,0x28,0x21,0x0e,
1734 0x00,0x00,0x00,0xbd,
1735
1736 0x00,0x00,0x40,0xbd,
1737 0x00,0xa4,0x08,0x2f,
1738 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001739
1740 0x83,0xa8,0xb1,0x6e,
1741 0x83,0x00,0x26,0x1e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001742 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001743
1744 test_asm(r, [&](A& a) {
1745 a.ldrb(A::v0, A::x8);
1746 a.strb(A::v0, A::x8);
1747 },{
1748 0x00,0x01,0x40,0x3d,
1749 0x00,0x01,0x00,0x3d,
1750 });
Mike Klein81d52672019-07-30 11:11:09 -05001751
1752 test_asm(r, [&](A& a) {
1753 a.tbl(A::v0, A::v1, A::v2);
1754 },{
1755 0x20,0x00,0x02,0x4e,
1756 });
Mike Klein05642042019-06-18 12:16:06 -05001757}
Mike Reedbcb46c02020-03-23 17:51:01 -04001758
1759DEF_TEST(SkVM_approx_math, r) {
1760 auto eval = [](int N, float values[], auto fn) {
1761 skvm::Builder b;
1762 skvm::Arg inout = b.varying<float>();
1763
1764 b.storeF(inout, fn(&b, b.loadF(inout)));
1765
1766 b.done().eval(N, values);
1767 };
1768
1769 auto compare = [r](int N, const float values[], const float expected[]) {
1770 for (int i = 0; i < N; ++i) {
1771 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1772 }
1773 };
1774
1775 // log2
1776 {
1777 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1778 constexpr int N = SK_ARRAY_COUNT(values);
1779 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1780 return b->approx_log2(v);
1781 });
1782 const float expected[] = {-2, -1, 0, 1, 2, 3};
1783 compare(N, values, expected);
1784 }
1785
1786 // pow2
1787 {
1788 float values[] = {-2, -1, 0, 1, 2, 3};
1789 constexpr int N = SK_ARRAY_COUNT(values);
1790 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1791 return b->approx_pow2(v);
1792 });
1793 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
1794 compare(N, values, expected);
1795 }
1796
1797 // powf -- x^0.5
1798 {
1799 float bases[] = {0, 1, 4, 9, 16};
1800 constexpr int N = SK_ARRAY_COUNT(bases);
1801 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
1802 return b->approx_powf(base, b->splat(0.5f));
1803 });
1804 const float expected[] = {0, 1, 2, 3, 4};
1805 compare(N, bases, expected);
1806 }
1807 // powf -- 3^x
1808 {
1809 float exps[] = {-2, -1, 0, 1, 2};
1810 constexpr int N = SK_ARRAY_COUNT(exps);
1811 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
1812 return b->approx_powf(b->splat(3.0f), exp);
1813 });
1814 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
1815 compare(N, exps, expected);
1816 }
Mike Reed82ff25e2020-04-07 13:51:41 -04001817
Mike Reedd468a162020-04-11 14:14:00 -04001818 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04001819 skvm::Builder b;
1820 skvm::Arg inout = b.varying<float>();
1821 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04001822 float actual = arg;
1823 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04001824
Mike Reedd468a162020-04-11 14:14:00 -04001825 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04001826
1827 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04001828 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04001829 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04001830 }
Mike Reed1b84ef22020-04-13 17:56:24 -04001831 return err;
1832 };
1833
1834 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
1835 skvm::Builder b;
1836 skvm::Arg in0 = b.varying<float>();
1837 skvm::Arg in1 = b.varying<float>();
1838 skvm::Arg out = b.varying<float>();
1839 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
1840 float actual;
1841 b.done().eval(1, &arg0, &arg1, &actual);
1842
1843 float err = std::abs(actual - expected);
1844
1845 if (err > tolerance) {
1846 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
1847 REPORTER_ASSERT(r, true);
1848 }
Mike Reed801ba0d2020-04-10 12:37:36 -04001849 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04001850 };
1851
Mike Reed801ba0d2020-04-10 12:37:36 -04001852 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04001853 {
1854 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04001855 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04001856 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
1857 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
1858 return approx_sin(x);
1859 });
1860 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
1861 return approx_cos(x);
1862 });
1863 }
Mike Reed801ba0d2020-04-10 12:37:36 -04001864
1865 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
1866 // so bring in the domain a little.
1867 constexpr float eps = 0.16f;
1868 float err = 0;
1869 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
1870 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
1871 return approx_tan(x);
1872 });
1873 // try again with some multiples of P, to check our periodicity
1874 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
1875 return approx_tan(x + 3*P);
1876 });
1877 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
1878 return approx_tan(x - 3*P);
1879 });
1880 }
Mike Reedd468a162020-04-11 14:14:00 -04001881 if (0) { SkDebugf("tan error %g\n", err); }
1882 }
1883
1884 // asin, acos, atan
1885 {
1886 constexpr float tol = 0.00175f;
1887 float err = 0;
1888 for (float x = -1; x <= 1; x += 1.0f/64) {
1889 err += test(x, asin(x), tol, [](skvm::F32 x) {
1890 return approx_asin(x);
1891 });
1892 test(x, acos(x), tol, [](skvm::F32 x) {
1893 return approx_acos(x);
1894 });
Mike Reed801ba0d2020-04-10 12:37:36 -04001895 }
Mike Reedd468a162020-04-11 14:14:00 -04001896 if (0) { SkDebugf("asin error %g\n", err); }
1897
1898 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04001899 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04001900 err += test(x, atan(x), tol, [](skvm::F32 x) {
1901 return approx_atan(x);
1902 });
1903 }
1904 if (0) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04001905
1906 for (float y = -3; y <= 3; y += 1) {
1907 for (float x = -3; x <= 3; x += 1) {
1908 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
1909 return approx_atan(y,x);
1910 });
1911 }
1912 }
1913 if (0) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04001914 }
Mike Reedbcb46c02020-03-23 17:51:01 -04001915}
Mike Klein210288f2020-04-08 11:31:07 -05001916
1917DEF_TEST(SkVM_min_max, r) {
1918 // min() and max() have subtle behavior when one argument is NaN and
1919 // the other isn't. It's not sound to blindly swap their arguments.
1920 //
1921 // All backends must behave like std::min() and std::max(), which are
1922 //
1923 // min(x,y) = y<x ? y : x
1924 // max(x,y) = x<y ? y : x
1925
1926 // ±NaN, ±0, ±1, ±inf
1927 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
1928 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
1929
1930 float f[8];
1931 memcpy(f, bits, sizeof(bits));
1932
1933 auto identical = [&](float x, float y) {
1934 uint32_t X,Y;
1935 memcpy(&X, &x, 4);
1936 memcpy(&Y, &y, 4);
1937 return X == Y;
1938 };
1939
1940 // Test min/max with non-constant x, non-constant y.
1941 // (Whether x and y are varying or uniform shouldn't make any difference.)
1942 {
1943 skvm::Builder b;
1944 {
1945 skvm::Arg src = b.varying<float>(),
1946 mn = b.varying<float>(),
1947 mx = b.varying<float>();
1948
1949 skvm::F32 x = b.loadF(src),
1950 y = b.uniformF(b.uniform(), 0);
1951
1952 b.storeF(mn, b.min(x,y));
1953 b.storeF(mx, b.max(x,y));
1954 }
1955
Mike Klein10fc1e62020-04-13 11:57:05 -05001956 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05001957 float mn[8], mx[8];
1958 for (int i = 0; i < 8; i++) {
1959 // min() and max() everything with f[i].
1960 program.eval(8, f,mn,mx, &f[i]);
1961
1962 for (int j = 0; j < 8; j++) {
1963 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
1964 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
1965 }
1966 }
1967 });
1968 }
1969
1970 // Test each with constant on the right.
1971 for (int i = 0; i < 8; i++) {
1972 skvm::Builder b;
1973 {
1974 skvm::Arg src = b.varying<float>(),
1975 mn = b.varying<float>(),
1976 mx = b.varying<float>();
1977
1978 skvm::F32 x = b.loadF(src),
1979 y = b.splat(f[i]);
1980
1981 b.storeF(mn, b.min(x,y));
1982 b.storeF(mx, b.max(x,y));
1983 }
1984
Mike Klein10fc1e62020-04-13 11:57:05 -05001985 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05001986 float mn[8], mx[8];
1987 program.eval(8, f,mn,mx);
1988 for (int j = 0; j < 8; j++) {
1989 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
1990 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
1991 }
1992 });
1993 }
1994
1995 // Test each with constant on the left.
1996 for (int i = 0; i < 8; i++) {
1997 skvm::Builder b;
1998 {
1999 skvm::Arg src = b.varying<float>(),
2000 mn = b.varying<float>(),
2001 mx = b.varying<float>();
2002
2003 skvm::F32 x = b.splat(f[i]),
2004 y = b.loadF(src);
2005
2006 b.storeF(mn, b.min(x,y));
2007 b.storeF(mx, b.max(x,y));
2008 }
2009
Mike Klein10fc1e62020-04-13 11:57:05 -05002010 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002011 float mn[8], mx[8];
2012 program.eval(8, f,mn,mx);
2013 for (int j = 0; j < 8; j++) {
2014 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2015 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2016 }
2017 });
2018 }
2019}