blob: 87385d4eeb2189b9784cbcf391ddd0356d3c8b53 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
10#include "src/core/SkVM.h"
11#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050012#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050013#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050014
Mike Klein7b7077c2019-06-03 17:10:59 -050015using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050016const char* fmt_name(Fmt fmt) {
17 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050018 case Fmt::A8: return "A8";
19 case Fmt::G8: return "G8";
20 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050021 }
22 return "";
23}
24
Mike Klein6b4143e2019-09-18 11:49:29 -050025static void dump(skvm::Builder& builder, SkWStream* o) {
26 skvm::Program program = builder.done();
27 builder.dump(o);
28 o->writeText("\n");
29 program.dump(o);
30 o->writeText("\n");
31}
Mike Klein7e650762019-07-02 15:21:11 -050032
Mike Klein9977efa2019-07-15 12:22:36 -050033template <typename Fn>
34static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
35 test((const skvm::Program&) program);
36 program.dropJIT();
37 test((const skvm::Program&) program);
38}
Mike Klein7e650762019-07-02 15:21:11 -050039
Mike Klein68c50d02019-05-29 12:57:54 -050040DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050041 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050042
43 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050044 for (int s = 0; s < 3; s++)
45 for (int d = 0; d < 3; d++) {
46 auto srcFmt = (Fmt)s,
47 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050048 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050049
Mike Klein267f5072019-06-03 16:27:46 -050050 buf.writeText(fmt_name(srcFmt));
51 buf.writeText(" over ");
52 buf.writeText(fmt_name(dstFmt));
53 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050054 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050055 }
Mike Klein68c50d02019-05-29 12:57:54 -050056
Mike Klein7b7077c2019-06-03 17:10:59 -050057 // Write the I32 Srcovers also.
58 {
Mike Kleinaab45b52019-07-02 15:39:23 -050059 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050060 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050061 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050062 }
63 {
Mike Kleinaab45b52019-07-02 15:39:23 -050064 SrcoverBuilder_I32 builder;
Mike Klein7b7077c2019-06-03 17:10:59 -050065 buf.writeText("I32 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050066 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -050067 }
68 {
Mike Kleinaab45b52019-07-02 15:39:23 -050069 SrcoverBuilder_I32_SWAR builder;
Mike Klein7b7077c2019-06-03 17:10:59 -050070 buf.writeText("I32 (SWAR) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050071 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -050072 }
73
Mike Kleinf9963112019-08-08 15:13:25 -040074 {
75 skvm::Builder b;
76 skvm::Arg arg = b.varying<int>();
77
78 // x and y can both be hoisted,
79 // and x can die at y, while y lives forever.
80 skvm::I32 x = b.splat(1),
81 y = b.add(x, b.splat(2));
82 b.store32(arg, b.mul(b.load32(arg), y));
83
84 skvm::Program program = b.done();
85 REPORTER_ASSERT(r, program.nregs() == 2);
86
87 std::vector<skvm::Builder::Instruction> insts = b.program();
88 REPORTER_ASSERT(r, insts.size() == 6);
89 REPORTER_ASSERT(r, insts[0].hoist && insts[0].death == 2);
90 REPORTER_ASSERT(r, insts[1].hoist && insts[1].death == 2);
91 REPORTER_ASSERT(r, insts[2].hoist && insts[2].death == 6);
92 REPORTER_ASSERT(r, !insts[3].hoist);
93 REPORTER_ASSERT(r, !insts[4].hoist);
94 REPORTER_ASSERT(r, !insts[5].hoist);
95
96 dump(b, &buf);
97
98 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
99 int arg[] = {0,1,2,3,4,5,6,7,8,9};
100
101 program.eval(SK_ARRAY_COUNT(arg), arg);
102
103 for (int i = 0; i < (int)SK_ARRAY_COUNT(arg); i++) {
104 REPORTER_ASSERT(r, arg[i] == i*3);
105 }
106 });
107 }
108
Mike Klein267f5072019-06-03 16:27:46 -0500109 sk_sp<SkData> blob = buf.detachAsData();
110 {
111
112 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Mike Klein77163312019-06-04 13:35:32 -0500113 REPORTER_ASSERT(r, expected, "Couldn't load SkVMTest.expected.");
114 if (expected) {
115 if (blob->size() != expected->size()
116 || 0 != memcmp(blob->data(), expected->data(), blob->size())) {
Mike Klein267f5072019-06-03 16:27:46 -0500117
Mike Klein77163312019-06-04 13:35:32 -0500118 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
119 expected->size(), expected->data(),
120 blob->size(), blob->data());
121 }
122
123 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
124 if (out.isValid()) {
125 out.write(blob->data(), blob->size());
126 }
Mike Klein68c50d02019-05-29 12:57:54 -0500127 }
128 }
129
Mike Klein9977efa2019-07-15 12:22:36 -0500130 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500131 uint32_t src[9];
132 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500133
Mike Klein9977efa2019-07-15 12:22:36 -0500134 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
135 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
136 src[i] = 0xbb007733;
137 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500138 }
Mike Klein9977efa2019-07-15 12:22:36 -0500139
140 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
141
142 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
143
144 // dst is probably 0xff2dad72.
145 for (auto got : dst) {
146 auto want = expected;
147 for (int i = 0; i < 4; i++) {
148 uint8_t d = got & 0xff,
149 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500150 if (abs(d-w) >= 2) {
151 SkDebugf("d %02x, w %02x\n", d,w);
152 }
Mike Klein9977efa2019-07-15 12:22:36 -0500153 REPORTER_ASSERT(r, abs(d-w) < 2);
154 got >>= 8;
155 want >>= 8;
156 }
157 }
158 });
Mike Klein3f593792019-06-12 12:54:52 -0500159 };
Mike Klein68c50d02019-05-29 12:57:54 -0500160
Mike Klein37607d42019-07-18 10:17:28 -0500161 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
162 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
163 test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
164 test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500165
Mike Klein9977efa2019-07-15 12:22:36 -0500166 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
167 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500168 uint32_t src[9];
169 uint8_t dst[SK_ARRAY_COUNT(src)];
170
171 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
172 src[i] = 0xbb007733;
173 dst[i] = 0x42;
174 }
175
176 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
177 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500178
179 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
180 SkGetPackedG32(over),
181 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500182 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500183
Mike Klein3f593792019-06-12 12:54:52 -0500184 for (auto got : dst) {
185 REPORTER_ASSERT(r, abs(got-want) < 3);
186 }
Mike Klein9977efa2019-07-15 12:22:36 -0500187 });
Mike Klein68c50d02019-05-29 12:57:54 -0500188
Mike Klein9977efa2019-07-15 12:22:36 -0500189 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
190 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500191 uint8_t src[256],
192 dst[256];
193 for (int i = 0; i < 256; i++) {
194 src[i] = 255 - i;
195 dst[i] = i;
196 }
197
198 program.eval(256, src, dst);
199
200 for (int i = 0; i < 256; i++) {
201 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
202 SkPackARGB32( i, 0,0,0)));
203 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
204 }
Mike Klein9977efa2019-07-15 12:22:36 -0500205 });
Mike Klein68c50d02019-05-29 12:57:54 -0500206}
Mike Klein81756e42019-06-12 11:36:28 -0500207
Mike Klein9fdadb92019-07-30 12:30:13 -0500208DEF_TEST(SkVM_Pointless, r) {
209 // Let's build a program with no memory arguments.
210 // It should all be pegged as dead code, but we should be able to "run" it.
211 skvm::Builder b;
212 {
213 b.add(b.splat(5.0f),
214 b.splat(4.0f));
215 }
216
217 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
218 for (int N = 0; N < 64; N++) {
219 program.eval(N);
220 }
221 });
222
223 for (const skvm::Builder::Instruction& inst : b.program()) {
224 REPORTER_ASSERT(r, inst.death == 0 && inst.hoist == true);
225 }
226}
227
Mike Klein81756e42019-06-12 11:36:28 -0500228DEF_TEST(SkVM_LoopCounts, r) {
229 // Make sure we cover all the exact N we want.
230
Mike Klein9977efa2019-07-15 12:22:36 -0500231 // buf[i] += 1
232 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500233 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500234 b.store32(arg,
235 b.add(b.splat(1),
236 b.load32(arg)));
237
Mike Klein9e2218a2019-07-19 11:13:42 -0500238 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
239 int buf[64];
240 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500241 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
242 buf[i] = i;
243 }
244 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500245
Mike Klein9977efa2019-07-15 12:22:36 -0500246 for (int i = 0; i < N; i++) {
247 REPORTER_ASSERT(r, buf[i] == i+1);
248 }
249 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
250 REPORTER_ASSERT(r, buf[i] == i);
251 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500252 }
253 });
Mike Klein81756e42019-06-12 11:36:28 -0500254}
Mike Klein05642042019-06-18 12:16:06 -0500255
Mike Klein81d52672019-07-30 11:11:09 -0500256DEF_TEST(SkVM_gathers, r) {
257 skvm::Builder b;
258 {
259 skvm::Arg img = b.uniform(),
260 buf32 = b.varying<int>(),
261 buf16 = b.varying<uint16_t>(),
262 buf8 = b.varying<uint8_t>();
263
264 skvm::I32 x = b.load32(buf32);
265
266 b.store32(buf32, b.gather32(img, b.bit_and(x, b.splat( 7))));
267 b.store16(buf16, b.gather16(img, b.bit_and(x, b.splat(15))));
268 b.store8 (buf8 , b.gather8 (img, b.bit_and(x, b.splat(31))));
269 }
270
271 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
272 const int img[] = {12,34,56,78, 90,98,76,54};
273
274 constexpr int N = 20;
275 int buf32[N];
276 uint16_t buf16[N];
277 uint8_t buf8 [N];
278
279 for (int i = 0; i < 20; i++) {
280 buf32[i] = i;
281 }
282
283 program.eval(N, img, buf32, buf16, buf8);
284 int i = 0;
285 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
286 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
287 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
288 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
289 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
290 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
291 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
292 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
293
294 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
295 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
296 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
297 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
298 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
299 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
300 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
301 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
302
303 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
304 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
305 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
306 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
307 });
308}
309
310DEF_TEST(SkVM_bitops, r) {
311 skvm::Builder b;
312 {
313 skvm::Arg ptr = b.varying<int>();
314
315 skvm::I32 x = b.load32(ptr);
316
317 x = b.bit_and (x, b.splat(0xf1)); // 0x40
318 x = b.bit_or (x, b.splat(0x80)); // 0xc0
319 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
320 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
321
322 x = b.shl(x, 28); // 0xe000'0000
323 x = b.sra(x, 28); // 0xffff'fffe
324 x = b.shr(x, 1); // 0x7fff'ffff
325
326 b.store32(ptr, x);
327 }
328
329 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
330 int x = 0x42;
331 program.eval(1, &x);
332 REPORTER_ASSERT(r, x == 0x7fff'ffff);
333 });
334}
335
336DEF_TEST(SkVM_f32, r) {
337 skvm::Builder b;
338 {
339 skvm::Arg arg = b.varying<float>();
340
341 skvm::F32 x = b.bit_cast(b.load32(arg)),
342 y = b.add(x,x), // y = 2x
343 z = b.sub(y,x), // z = 2x-x = x
344 w = b.div(z,x); // w = x/x = 1
345 b.store32(arg, b.bit_cast(w));
346 }
347
348 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
349 float buf[] = { 1,2,3,4,5,6,7,8,9 };
350 program.eval(SK_ARRAY_COUNT(buf), buf);
351 for (float v : buf) {
352 REPORTER_ASSERT(r, v == 1.0f);
353 }
354 });
355}
356
357DEF_TEST(SkVM_cmp_i32, r) {
358 skvm::Builder b;
359 {
360 skvm::I32 x = b.load32(b.varying<int>());
361
362 auto to_bit = [&](int shift, skvm::I32 mask) {
363 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
364 };
365
366 skvm::I32 m = b.splat(0);
367 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
368 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
369 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
370 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
371 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
372 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
373
374 b.store32(b.varying<int>(), m);
375 }
376
377 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
378 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
379 int out[SK_ARRAY_COUNT(in)];
380
381 program.eval(SK_ARRAY_COUNT(in), in, out);
382
383 REPORTER_ASSERT(r, out[0] == 0b001111);
384 REPORTER_ASSERT(r, out[1] == 0b001100);
385 REPORTER_ASSERT(r, out[2] == 0b001010);
386 REPORTER_ASSERT(r, out[3] == 0b001010);
387 REPORTER_ASSERT(r, out[4] == 0b000010);
388 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
389 REPORTER_ASSERT(r, out[i] == 0b110010);
390 }
391 });
392}
393
394DEF_TEST(SkVM_cmp_f32, r) {
395 skvm::Builder b;
396 {
397 skvm::F32 x = b.bit_cast(b.load32(b.varying<float>()));
398
399 auto to_bit = [&](int shift, skvm::I32 mask) {
400 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
401 };
402
403 skvm::I32 m = b.splat(0);
404 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
405 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
406 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
407 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
408 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
409 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
410
411 b.store32(b.varying<int>(), m);
412 }
413
414 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
415 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
416 int out[SK_ARRAY_COUNT(in)];
417
418 program.eval(SK_ARRAY_COUNT(in), in, out);
419
420 REPORTER_ASSERT(r, out[0] == 0b001111);
421 REPORTER_ASSERT(r, out[1] == 0b001100);
422 REPORTER_ASSERT(r, out[2] == 0b001010);
423 REPORTER_ASSERT(r, out[3] == 0b001010);
424 REPORTER_ASSERT(r, out[4] == 0b000010);
425 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
426 REPORTER_ASSERT(r, out[i] == 0b110010);
427 }
428 });
429}
430
431DEF_TEST(SkVM_i16x2, r) {
432 skvm::Builder b;
433 {
434 skvm::Arg buf = b.varying<int>();
435
436 skvm::I32 x = b.load32(buf),
437 y = b.add_16x2(x,x), // y = 2x
438 z = b.mul_16x2(x,y), // z = 2x^2
439 w = b.sub_16x2(z,x), // w = x(2x-1)
440 v = b.shl_16x2(w,7), // These shifts will be a no-op
441 u = b.sra_16x2(v,7); // for all but x=12 and x=13.
442 b.store32(buf, u);
443 }
444
445 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
446 uint16_t buf[] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13 };
447
448 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
449 for (int i = 0; i < 12; i++) {
450 REPORTER_ASSERT(r, buf[i] == i*(2*i-1));
451 }
452 REPORTER_ASSERT(r, buf[12] == 0xff14); // 12*23 = 0x114
453 REPORTER_ASSERT(r, buf[13] == 0xff45); // 13*25 = 0x145
454 });
455}
456
457DEF_TEST(SkVM_cmp_i16, r) {
458 skvm::Builder b;
459 {
460 skvm::Arg buf = b.varying<int>();
461 skvm::I32 x = b.load32(buf);
462
463 auto to_bit = [&](int shift, skvm::I32 mask) {
464 return b.shl_16x2(b.bit_and(mask, b.splat(0x0001'0001)), shift);
465 };
466
467 skvm::I32 m = b.splat(0);
468 m = b.bit_or(m, to_bit(0, b. eq_16x2(x, b.splat(0x0000'0000))));
469 m = b.bit_or(m, to_bit(1, b.neq_16x2(x, b.splat(0x0001'0001))));
470 m = b.bit_or(m, to_bit(2, b. lt_16x2(x, b.splat(0x0002'0002))));
471 m = b.bit_or(m, to_bit(3, b.lte_16x2(x, b.splat(0x0003'0003))));
472 m = b.bit_or(m, to_bit(4, b. gt_16x2(x, b.splat(0x0004'0004))));
473 m = b.bit_or(m, to_bit(5, b.gte_16x2(x, b.splat(0x0005'0005))));
474
475 b.store32(buf, m);
476 }
477
478 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
479 int16_t buf[] = { 0,1, 2,3, 4,5, 6,7, 8,9 };
480
481 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
482
483 REPORTER_ASSERT(r, buf[0] == 0b001111);
484 REPORTER_ASSERT(r, buf[1] == 0b001100);
485 REPORTER_ASSERT(r, buf[2] == 0b001010);
486 REPORTER_ASSERT(r, buf[3] == 0b001010);
487 REPORTER_ASSERT(r, buf[4] == 0b000010);
488 for (int i = 5; i < (int)SK_ARRAY_COUNT(buf); i++) {
489 REPORTER_ASSERT(r, buf[i] == 0b110010);
490 }
491 });
492}
493
494
Mike Klein4a131192019-07-19 13:56:41 -0500495DEF_TEST(SkVM_mad, r) {
496 // This program is designed to exercise the tricky corners of instruction
497 // and register selection for Op::mad_f32.
498
499 skvm::Builder b;
500 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500501 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500502
503 skvm::F32 x = b.to_f32(b.load32(arg)),
504 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
505 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
506 w = b.mad(z,z,y), // w can alias z but not y.
507 v = b.mad(w,y,w); // Got to stop somewhere.
508 b.store32(arg, b.to_i32(v));
509 }
510
511 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
512 int x = 2;
513 program.eval(1, &x);
514 // x = 2
515 // y = 2*2 + 2 = 6
516 // z = 6*6 + 2 = 38
517 // w = 38*38 + 6 = 1450
518 // v = 1450*6 + 1450 = 10150
519 REPORTER_ASSERT(r, x == 10150);
520 });
521}
522
Mike Klein81d52672019-07-30 11:11:09 -0500523DEF_TEST(SkVM_madder, r) {
524 skvm::Builder b;
525 {
526 skvm::Arg arg = b.varying<float>();
527
528 skvm::F32 x = b.bit_cast(b.load32(arg)),
529 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
530 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
531 w = b.mad(y,y,z);
532 b.store32(arg, b.bit_cast(w));
533 }
534
535 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
536 float x = 2.0f;
537 // y = 2*2 + 2 = 6
538 // z = 6*2 + 6 = 18
539 // w = 6*6 + 18 = 54
540 program.eval(1, &x);
541 REPORTER_ASSERT(r, x == 54.0f);
542 });
543}
544
Mike Kleinf98d0d32019-07-22 14:30:18 -0500545DEF_TEST(SkVM_hoist, r) {
546 // This program uses enough constants that it will fail to JIT if we hoist them.
547 // The JIT will try again without hoisting, and that'll just need 2 registers.
548 skvm::Builder b;
549 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500550 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500551 skvm::I32 x = b.load32(arg);
552 for (int i = 0; i < 32; i++) {
553 x = b.add(x, b.splat(i));
554 }
555 b.store32(arg, x);
556 }
557
558 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
559 int x = 4;
560 program.eval(1, &x);
561 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
562 // x += 496
563 REPORTER_ASSERT(r, x == 500);
564 });
565}
566
Mike Kleinb9944122019-08-02 12:22:39 -0500567DEF_TEST(SkVM_select, r) {
568 skvm::Builder b;
569 {
570 skvm::Arg buf = b.varying<int>();
571
572 skvm::I32 x = b.load32(buf);
573
574 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
575
576 b.store32(buf, x);
577 }
578
579 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
580 int buf[] = { 0,1,2,3,4,5,6,7,8 };
581 program.eval(SK_ARRAY_COUNT(buf), buf);
582 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
583 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
584 }
585 });
586}
587
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500588DEF_TEST(SkVM_NewOps, r) {
589 // Exercise a somewhat arbitrary set of new ops.
590 skvm::Builder b;
591 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500592 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500593 img = b.uniform(),
594 uniforms = b.uniform();
595
596 skvm::I32 x = b.load16(buf);
597
598 x = b.add(x, b.uniform32(uniforms, 0));
599 x = b.mul(x, b.uniform8 (uniforms, 4));
600 x = b.sub(x, b.uniform16(uniforms, 6));
601
602 skvm::I32 limit = b.uniform32(uniforms, 8);
603 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
604 x = b.select(b.gt(x, limit ), limit , x);
605
606 x = b.gather8(img, x);
607
608 b.store16(buf, x);
609 }
610
611 if ((false)) {
612 SkDynamicMemoryWStream buf;
613 dump(b, &buf);
614 sk_sp<SkData> blob = buf.detachAsData();
615 SkDebugf("%.*s\n", blob->size(), blob->data());
616 }
617
618 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
619 const int N = 31;
620 int16_t buf[N];
621 for (int i = 0; i < N; i++) {
622 buf[i] = i;
623 }
624
625 const int M = 16;
626 uint8_t img[M];
627 for (int i = 0; i < M; i++) {
628 img[i] = i*i;
629 }
630
631 struct {
632 int add = 5;
633 uint8_t mul = 3;
634 uint16_t sub = 18;
635 int limit = M-1;
636 } uniforms;
637
638 program.eval(N, buf, img, &uniforms);
639
640 for (int i = 0; i < N; i++) {
641 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
642 int x = 3*(i-1);
643
644 // Then that's pinned to the limits of img.
645 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
646 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
647 REPORTER_ASSERT(r, buf[i] == img[x]);
648 }
649 });
650}
651
Mike Klein05642042019-06-18 12:16:06 -0500652
Mike Klein05642042019-06-18 12:16:06 -0500653template <typename Fn>
654static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -0400655 uint8_t buf[4096];
656 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -0500657 fn(a);
658
659 REPORTER_ASSERT(r, a.size() == expected.size());
660
Mike Klein88c0a902019-06-24 15:34:02 -0400661 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -0500662 want = expected.begin();
663 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -0500664 REPORTER_ASSERT(r, got[i] == want[i],
665 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -0500666 }
667}
668
669DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -0500670 // Easiest way to generate test cases is
671 //
672 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
673 //
674 // The -x86-asm-syntax=intel bit is optional, controlling the
675 // input syntax only; the output will always be AT&T op x,y,dst style.
676 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
677 // that a bit easier to use here, despite maybe favoring AT&T overall.
678
679 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -0500680 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -0500681 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500682 a.vzeroupper();
683 a.ret();
684 },{
685 0xc5, 0xf8, 0x77,
686 0xc3,
687 });
688
Mike Klein237dbb42019-07-19 09:44:47 -0500689 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -0500690 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500691 a.ret();
692 a.align(4);
693 },{
694 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -0500695 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -0500696 });
Mike Klein61703a62019-06-18 15:01:12 -0500697
Mike Klein397fc882019-06-20 11:37:10 -0500698 test_asm(r, [&](A& a) {
699 a.add(A::rax, 8); // Always good to test rax.
700 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -0500701
Mike Klein397fc882019-06-20 11:37:10 -0500702 a.add(A::rdi, 12); // Last 0x48 REX
703 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -0500704
Mike Klein86a645c2019-07-12 12:29:39 -0500705 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -0500706 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -0500707
Mike Klein397fc882019-06-20 11:37:10 -0500708 a.add(A::rsi, 128); // Requires 4 byte immediate.
709 a.sub(A::r8 , 1000000);
Mike Klein61703a62019-06-18 15:01:12 -0500710 },{
Mike Kleind3e75a72019-06-18 15:26:08 -0500711 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -0500712 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -0500713
714 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -0500715 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -0500716
Mike Klein86a645c2019-07-12 12:29:39 -0500717 0x49, 0x83, 0b11'000'000, 0x07,
718 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -0500719
720 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -0500721 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Klein61703a62019-06-18 15:01:12 -0500722 });
Mike Klein397fc882019-06-20 11:37:10 -0500723
724
725 test_asm(r, [&](A& a) {
726 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
727 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
728 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
729 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
730 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
731 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
732 },{
733 /* VEX */ /*op*/ /*modRM*/
734 0xc5, 0xf5, 0xfe, 0xc2,
735 0xc5, 0x75, 0xfe, 0xc2,
736 0xc5, 0xbd, 0xfe, 0xc2,
737 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
738 0xc4, 0xe2, 0x75, 0x40, 0xc2,
739 0xc5, 0xf5, 0xfa, 0xc2,
740 });
Mike Kleinff0ae812019-06-20 15:03:44 -0500741
742 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -0500743 a.vpcmpeqd(A::ymm0, A::ymm1, A::ymm2);
744 a.vpcmpgtd(A::ymm0, A::ymm1, A::ymm2);
745 },{
746 0xc5,0xf5,0x76,0xc2,
747 0xc5,0xf5,0x66,0xc2,
748 });
749
750 test_asm(r, [&](A& a) {
751 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
752 },{
753 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
754 });
755
756 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -0500757 a.vpsrld(A::ymm15, A::ymm2, 8);
758 a.vpsrld(A::ymm0 , A::ymm8, 5);
759 },{
760 0xc5, 0x85, 0x72,0xd2, 0x08,
761 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
762 });
763
764 test_asm(r, [&](A& a) {
765 a.vpermq(A::ymm1, A::ymm2, 5);
766 },{
767 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
768 });
Mike Kleine5053412019-06-21 12:37:22 -0500769
770 test_asm(r, [&](A& a) {
771 A::Label l = a.here();
772 a.byte(1);
773 a.byte(2);
774 a.byte(3);
775 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -0500776
Mike Klein65c10b52019-07-12 09:22:21 -0500777 a.vbroadcastss(A::ymm0 , &l);
778 a.vbroadcastss(A::ymm1 , &l);
779 a.vbroadcastss(A::ymm8 , &l);
780 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -0500781
Mike Klein65c10b52019-07-12 09:22:21 -0500782 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -0500783 },{
784 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -0500785
Mike Kleine5053412019-06-21 12:37:22 -0500786 /* VEX */ /*op*/ /* ModRM */ /* offset */
787 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
788 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
789 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
790 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -0500791
792 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Kleine5053412019-06-21 12:37:22 -0500793 });
Mike Klein060eaaa2019-06-21 14:42:09 -0500794
795 test_asm(r, [&](A& a) {
Mike Klein788967e2019-08-02 10:15:51 -0500796 a.vbroadcastss(A::ymm0, A::rdi, 0);
797 a.vbroadcastss(A::ymm13, A::r14, 7);
798 a.vbroadcastss(A::ymm8, A::rdx, -12);
799 a.vbroadcastss(A::ymm8, A::rdx, 400);
Mike Klein94d054b2019-08-02 10:54:23 -0500800
801 a.vbroadcastss(A::ymm8, A::xmm0);
802 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -0500803 },{
804 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
805 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
806 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
807 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
808 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -0500809
810 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
811 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -0500812 });
813
814 test_asm(r, [&](A& a) {
Mike Klein060eaaa2019-06-21 14:42:09 -0500815 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -0500816 a.jne(&l);
817 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -0500818 a.je (&l);
819 a.jmp(&l);
820 a.jl (&l);
821
822 a.cmp(A::rdx, 0);
823 a.cmp(A::rax, 12);
824 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -0500825 },{
Mike Klein35b97c32019-07-12 12:32:45 -0500826 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
827 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
828 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
829 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
830 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
831
832 0x48,0x83,0xfa,0x00,
833 0x48,0x83,0xf8,0x0c,
834 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -0500835 });
Mike Klein120d9e82019-06-21 15:52:55 -0500836
837 test_asm(r, [&](A& a) {
838 a.vmovups(A::ymm5, A::rsi);
839 a.vmovups(A::rsi, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -0500840
Mike Klein95529e82019-08-02 11:43:43 -0500841 a.vmovups(A::rsi, A::xmm5);
842
Mike Klein52010b72019-08-02 11:18:00 -0500843 a.vpmovzxwd(A::ymm4, A::rsi);
Mike Kleinae51aa32019-06-21 16:06:03 -0500844 a.vpmovzxbd(A::ymm4, A::rsi);
Mike Kleinf3881b22019-06-21 16:20:24 -0500845
846 a.vmovq(A::rdx, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -0500847 },{
Mike Kleinae51aa32019-06-21 16:06:03 -0500848 /* VEX */ /*Op*/ /* ModRM */
849 0xc5, 0xfc, 0x10, 0b00'101'110,
850 0xc5, 0xfc, 0x11, 0b00'101'110,
851
Mike Klein95529e82019-08-02 11:43:43 -0500852 0xc5, 0xf8, 0x11, 0b00'101'110,
853
Mike Klein52010b72019-08-02 11:18:00 -0500854 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -0500855 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -0500856
857 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -0500858 });
Mike Klein2b7b2a22019-06-23 20:35:28 -0400859
860 test_asm(r, [&](A& a) {
Mike Klein94d054b2019-08-02 10:54:23 -0500861 a.movzbl(A::rax, A::rsi, 0); // Low registers for src and dst.
862 a.movzbl(A::rax, A::r8, 0); // High src register.
863 a.movzbl(A::r8 , A::rsi, 0); // High dst register.
864 a.movzbl(A::r8, A::rsi, 12);
865 a.movzbl(A::r8, A::rsi, 400);
Mike Klein35b97c32019-07-12 12:32:45 -0500866
867 a.vmovd(A::rax, A::xmm0);
868 a.vmovd(A::rax, A::xmm8);
869 a.vmovd(A::r8, A::xmm0);
870
871 a.vmovd(A::xmm0, A::rax);
872 a.vmovd(A::xmm8, A::rax);
873 a.vmovd(A::xmm0, A::r8);
874
875 a.vmovd_direct(A::rax, A::xmm0);
876 a.vmovd_direct(A::rax, A::xmm8);
877 a.vmovd_direct(A::r8, A::xmm0);
878
879 a.vmovd_direct(A::xmm0, A::rax);
880 a.vmovd_direct(A::xmm8, A::rax);
881 a.vmovd_direct(A::xmm0, A::r8);
882
883 a.movb(A::rdx, A::rax);
884 a.movb(A::rdx, A::r8);
885 a.movb(A::r8 , A::rax);
886 },{
887 0x0f,0xb6,0x06,
888 0x41,0x0f,0xb6,0x00,
889 0x44,0x0f,0xb6,0x06,
Mike Klein94d054b2019-08-02 10:54:23 -0500890 0x44,0x0f,0xb6,0x46, 12,
891 0x44,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
Mike Klein35b97c32019-07-12 12:32:45 -0500892
893 0xc5,0xf9,0x7e,0x00,
894 0xc5,0x79,0x7e,0x00,
895 0xc4,0xc1,0x79,0x7e,0x00,
896
897 0xc5,0xf9,0x6e,0x00,
898 0xc5,0x79,0x6e,0x00,
899 0xc4,0xc1,0x79,0x6e,0x00,
900
901 0xc5,0xf9,0x7e,0xc0,
902 0xc5,0x79,0x7e,0xc0,
903 0xc4,0xc1,0x79,0x7e,0xc0,
904
905 0xc5,0xf9,0x6e,0xc0,
906 0xc5,0x79,0x6e,0xc0,
907 0xc4,0xc1,0x79,0x6e,0xc0,
908
909 0x88, 0x02,
910 0x44, 0x88, 0x02,
911 0x41, 0x88, 0x00,
912 });
913
914 test_asm(r, [&](A& a) {
Mike Klein52010b72019-08-02 11:18:00 -0500915 a.vpinsrw(A::xmm1, A::xmm8, A::rsi, 4);
916 a.vpinsrw(A::xmm8, A::xmm1, A::r8, 12);
917
Mike Klein35b97c32019-07-12 12:32:45 -0500918 a.vpinsrb(A::xmm1, A::xmm8, A::rsi, 4);
919 a.vpinsrb(A::xmm8, A::xmm1, A::r8, 12);
920
Mike Klein95529e82019-08-02 11:43:43 -0500921 a.vpextrw(A::rsi, A::xmm8, 7);
922 a.vpextrw(A::r8, A::xmm1, 15);
923
Mike Klein35b97c32019-07-12 12:32:45 -0500924 a.vpextrb(A::rsi, A::xmm8, 7);
925 a.vpextrb(A::r8, A::xmm1, 15);
926 },{
Mike Klein52010b72019-08-02 11:18:00 -0500927 0xc5,0xb9, 0xc4, 0x0e, 4,
928 0xc4,0x41,0x71, 0xc4, 0x00, 12,
929
Mike Klein35b97c32019-07-12 12:32:45 -0500930 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
931 0xc4,0x43,0x71, 0x20, 0x00, 12,
932
Mike Klein95529e82019-08-02 11:43:43 -0500933 0xc4,0x63,0x79, 0x15, 0x06, 7,
934 0xc4,0xc3,0x79, 0x15, 0x08, 15,
935
Mike Klein35b97c32019-07-12 12:32:45 -0500936 0xc4,0x63,0x79, 0x14, 0x06, 7,
937 0xc4,0xc3,0x79, 0x14, 0x08, 15,
938 });
939
940 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -0400941 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
942 },{
943 0xc5, 0x9d, 0xdf, 0xda,
944 });
Mike Klein9f4df802019-06-24 18:47:16 -0400945
Mike Kleind4546d62019-07-30 12:15:40 -0500946 test_asm(r, [&](A& a) {
947 a.vmovdqa (A::ymm3, A::ymm2);
948 a.vcvttps2dq(A::ymm3, A::ymm2);
949 a.vcvtdq2ps (A::ymm3, A::ymm2);
950 },{
951 0xc5,0xfd,0x6f,0xda,
952 0xc5,0xfe,0x5b,0xda,
953 0xc5,0xfc,0x5b,0xda,
954 });
955
Mike Klein9f4df802019-06-24 18:47:16 -0400956 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
957
958 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -0400959 a.and16b(A::v4, A::v3, A::v1);
960 a.orr16b(A::v4, A::v3, A::v1);
961 a.eor16b(A::v4, A::v3, A::v1);
962 a.bic16b(A::v4, A::v3, A::v1);
963
964 a.add4s(A::v4, A::v3, A::v1);
965 a.sub4s(A::v4, A::v3, A::v1);
966 a.mul4s(A::v4, A::v3, A::v1);
967
968 a.sub8h(A::v4, A::v3, A::v1);
969 a.mul8h(A::v4, A::v3, A::v1);
970
Mike Klein9f4df802019-06-24 18:47:16 -0400971 a.fadd4s(A::v4, A::v3, A::v1);
972 a.fsub4s(A::v4, A::v3, A::v1);
973 a.fmul4s(A::v4, A::v3, A::v1);
974 a.fdiv4s(A::v4, A::v3, A::v1);
975
Mike Klein65809142019-06-25 09:44:02 -0400976 a.fmla4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -0400977 },{
Mike Klein65809142019-06-25 09:44:02 -0400978 0x64,0x1c,0x21,0x4e,
979 0x64,0x1c,0xa1,0x4e,
980 0x64,0x1c,0x21,0x6e,
981 0x64,0x1c,0x61,0x4e,
982
983 0x64,0x84,0xa1,0x4e,
984 0x64,0x84,0xa1,0x6e,
985 0x64,0x9c,0xa1,0x4e,
986
987 0x64,0x84,0x61,0x6e,
988 0x64,0x9c,0x61,0x4e,
989
Mike Klein9f4df802019-06-24 18:47:16 -0400990 0x64,0xd4,0x21,0x4e,
991 0x64,0xd4,0xa1,0x4e,
992 0x64,0xdc,0x21,0x6e,
993 0x64,0xfc,0x21,0x6e,
994
Mike Klein65809142019-06-25 09:44:02 -0400995 0x64,0xcc,0x21,0x4e,
996 });
997
998 test_asm(r, [&](A& a) {
999 a.shl4s(A::v4, A::v3, 0);
1000 a.shl4s(A::v4, A::v3, 1);
1001 a.shl4s(A::v4, A::v3, 8);
1002 a.shl4s(A::v4, A::v3, 16);
1003 a.shl4s(A::v4, A::v3, 31);
1004
1005 a.sshr4s(A::v4, A::v3, 1);
1006 a.sshr4s(A::v4, A::v3, 8);
1007 a.sshr4s(A::v4, A::v3, 31);
1008
1009 a.ushr4s(A::v4, A::v3, 1);
1010 a.ushr4s(A::v4, A::v3, 8);
1011 a.ushr4s(A::v4, A::v3, 31);
1012
1013 a.ushr8h(A::v4, A::v3, 1);
1014 a.ushr8h(A::v4, A::v3, 8);
1015 a.ushr8h(A::v4, A::v3, 15);
1016 },{
1017 0x64,0x54,0x20,0x4f,
1018 0x64,0x54,0x21,0x4f,
1019 0x64,0x54,0x28,0x4f,
1020 0x64,0x54,0x30,0x4f,
1021 0x64,0x54,0x3f,0x4f,
1022
1023 0x64,0x04,0x3f,0x4f,
1024 0x64,0x04,0x38,0x4f,
1025 0x64,0x04,0x21,0x4f,
1026
1027 0x64,0x04,0x3f,0x6f,
1028 0x64,0x04,0x38,0x6f,
1029 0x64,0x04,0x21,0x6f,
1030
1031 0x64,0x04,0x1f,0x6f,
1032 0x64,0x04,0x18,0x6f,
1033 0x64,0x04,0x11,0x6f,
1034 });
1035
1036 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001037 a.sli4s(A::v4, A::v3, 0);
1038 a.sli4s(A::v4, A::v3, 1);
1039 a.sli4s(A::v4, A::v3, 8);
1040 a.sli4s(A::v4, A::v3, 16);
1041 a.sli4s(A::v4, A::v3, 31);
1042 },{
1043 0x64,0x54,0x20,0x6f,
1044 0x64,0x54,0x21,0x6f,
1045 0x64,0x54,0x28,0x6f,
1046 0x64,0x54,0x30,0x6f,
1047 0x64,0x54,0x3f,0x6f,
1048 });
1049
1050 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001051 a.scvtf4s (A::v4, A::v3);
1052 a.fcvtzs4s(A::v4, A::v3);
1053 },{
1054 0x64,0xd8,0x21,0x4e,
1055 0x64,0xb8,0xa1,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001056 });
Mike Klein15a368d2019-06-26 10:21:12 -04001057
1058 test_asm(r, [&](A& a) {
1059 a.ret(A::x30); // Conventional ret using link register.
1060 a.ret(A::x13); // Can really return using any register if we like.
1061
1062 a.add(A::x2, A::x2, 4);
1063 a.add(A::x3, A::x2, 32);
1064
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001065 a.sub(A::x2, A::x2, 4);
1066 a.sub(A::x3, A::x2, 32);
1067
Mike Klein15a368d2019-06-26 10:21:12 -04001068 a.subs(A::x2, A::x2, 4);
1069 a.subs(A::x3, A::x2, 32);
1070
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001071 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1072 a.cmp(A::x2, 4);
1073
Mike Klein15a368d2019-06-26 10:21:12 -04001074 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -05001075 a.bne(&l);
1076 a.bne(&l);
1077 a.blt(&l);
1078 a.b(&l);
1079 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001080 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -04001081 },{
1082 0xc0,0x03,0x5f,0xd6,
1083 0xa0,0x01,0x5f,0xd6,
1084
1085 0x42,0x10,0x00,0x91,
1086 0x43,0x80,0x00,0x91,
1087
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001088 0x42,0x10,0x00,0xd1,
1089 0x43,0x80,0x00,0xd1,
1090
Mike Klein15a368d2019-06-26 10:21:12 -04001091 0x42,0x10,0x00,0xf1,
1092 0x43,0x80,0x00,0xf1,
1093
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001094 0x5f,0x10,0x00,0xf1,
1095 0x5f,0x10,0x00,0xf1,
1096
1097 0x01,0x00,0x00,0x54, // b.ne #0
1098 0xe1,0xff,0xff,0x54, // b.ne #-4
1099 0xcb,0xff,0xff,0x54, // b.lt #-8
1100 0xae,0xff,0xff,0x54, // b.al #-12
1101 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1102 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -04001103 });
Mike Kleine51632e2019-06-26 14:47:43 -04001104
Mike Kleince7b88c2019-07-11 14:06:40 -05001105 // Can we cbz() to a not-yet-defined label?
1106 test_asm(r, [&](A& a) {
1107 A::Label l;
1108 a.cbz(A::x2, &l);
1109 a.add(A::x3, A::x2, 32);
1110 a.label(&l);
1111 a.ret(A::x30);
1112 },{
1113 0x42,0x00,0x00,0xb4, // cbz x2, #8
1114 0x43,0x80,0x00,0x91, // add x3, x2, #32
1115 0xc0,0x03,0x5f,0xd6, // ret
1116 });
1117
1118 // If we start a label as a backward label,
1119 // can we redefine it to be a future label?
1120 // (Not sure this is useful... just want to test it works.)
1121 test_asm(r, [&](A& a) {
1122 A::Label l1 = a.here();
1123 a.add(A::x3, A::x2, 32);
1124 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1125
1126 A::Label l2 = a.here(); // Start off the same...
1127 a.add(A::x3, A::x2, 32);
1128 a.cbz(A::x2, &l2); // Looks like this will go backward...
1129 a.add(A::x2, A::x2, 4);
1130 a.add(A::x3, A::x2, 32);
1131 a.label(&l2); // But no... actually forward! What a switcheroo!
1132 },{
1133 0x43,0x80,0x00,0x91, // add x3, x2, #32
1134 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1135
1136 0x43,0x80,0x00,0x91, // add x3, x2, #32
1137 0x62,0x00,0x00,0xb4, // cbz x2, #12
1138 0x42,0x10,0x00,0x91, // add x2, x2, #4
1139 0x43,0x80,0x00,0x91, // add x3, x2, #32
1140 });
1141
Mike Klein81d52672019-07-30 11:11:09 -05001142 // Loading from a label on ARM.
1143 test_asm(r, [&](A& a) {
1144 A::Label fore,aft;
1145 a.label(&fore);
1146 a.word(0x01234567);
1147 a.ldrq(A::v1, &fore);
1148 a.ldrq(A::v2, &aft);
1149 a.label(&aft);
1150 a.word(0x76543210);
1151 },{
1152 0x67,0x45,0x23,0x01,
1153 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1154 0x22,0x00,0x00,0x9c, // ldr q2, #4
1155 0x10,0x32,0x54,0x76,
1156 });
1157
Mike Kleine51632e2019-06-26 14:47:43 -04001158 test_asm(r, [&](A& a) {
1159 a.ldrq(A::v0, A::x8);
1160 a.strq(A::v0, A::x8);
1161 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001162 0x00,0x01,0xc0,0x3d,
1163 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001164 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001165
1166 test_asm(r, [&](A& a) {
1167 a.xtns2h(A::v0, A::v0);
1168 a.xtnh2b(A::v0, A::v0);
1169 a.strs (A::v0, A::x0);
1170
1171 a.ldrs (A::v0, A::x0);
1172 a.uxtlb2h(A::v0, A::v0);
1173 a.uxtlh2s(A::v0, A::v0);
1174 },{
1175 0x00,0x28,0x61,0x0e,
1176 0x00,0x28,0x21,0x0e,
1177 0x00,0x00,0x00,0xbd,
1178
1179 0x00,0x00,0x40,0xbd,
1180 0x00,0xa4,0x08,0x2f,
1181 0x00,0xa4,0x10,0x2f,
1182 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001183
1184 test_asm(r, [&](A& a) {
1185 a.ldrb(A::v0, A::x8);
1186 a.strb(A::v0, A::x8);
1187 },{
1188 0x00,0x01,0x40,0x3d,
1189 0x00,0x01,0x00,0x3d,
1190 });
Mike Klein81d52672019-07-30 11:11:09 -05001191
1192 test_asm(r, [&](A& a) {
1193 a.tbl(A::v0, A::v1, A::v2);
1194 },{
1195 0x20,0x00,0x02,0x4e,
1196 });
Mike Klein05642042019-06-18 12:16:06 -05001197}