blob: 8967928adcd891491ceb195d7f29de8f56358c59 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060010#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050011#include "src/core/SkVM.h"
12#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050013#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050014#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050015
Mike Klein7b7077c2019-06-03 17:10:59 -050016using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050017const char* fmt_name(Fmt fmt) {
18 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050019 case Fmt::A8: return "A8";
20 case Fmt::G8: return "G8";
21 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050022 }
23 return "";
24}
25
Mike Klein6b4143e2019-09-18 11:49:29 -050026static void dump(skvm::Builder& builder, SkWStream* o) {
27 skvm::Program program = builder.done();
28 builder.dump(o);
29 o->writeText("\n");
30 program.dump(o);
31 o->writeText("\n");
32}
Mike Klein7e650762019-07-02 15:21:11 -050033
Mike Kleinb5a30762019-10-16 10:11:56 -050034// TODO: I'd like this to go away and have every test in here run both JIT and interpreter.
Mike Klein9977efa2019-07-15 12:22:36 -050035template <typename Fn>
Mike Kleinb5a30762019-10-16 10:11:56 -050036static void test_interpreter_only(skiatest::Reporter* r, skvm::Program&& program, Fn&& test) {
Mike Kleinb5a30762019-10-16 10:11:56 -050037 REPORTER_ASSERT(r, !program.hasJIT());
Mike Klein4e115262019-10-16 16:48:52 +000038 test((const skvm::Program&) program);
Mike Klein52435502019-10-16 10:11:56 -050039}
40
Mike Kleinb5a30762019-10-16 10:11:56 -050041template <typename Fn>
42static void test_jit_and_interpreter(skiatest::Reporter* r, skvm::Program&& program, Fn&& test) {
Mike Klein3f7c8652019-11-07 10:33:56 -060043 static const bool can_jit = []{
44 // This is about the simplest program we can write, setting an int buffer to a constant.
45 // If this can't JIT, the platform does not support JITing.
46 skvm::Builder b;
47 b.store32(b.varying<int>(), b.splat(42));
48 skvm::Program p = b.done();
49 return p.hasJIT();
50 }();
51
52 if (can_jit) {
Mike Kleinb5a30762019-10-16 10:11:56 -050053 REPORTER_ASSERT(r, program.hasJIT());
54 test((const skvm::Program&) program);
55 program.dropJIT();
56 }
Mike Kleinb5a30762019-10-16 10:11:56 -050057 test_interpreter_only(r, std::move(program), std::move(test));
58}
59
60
Mike Klein68c50d02019-05-29 12:57:54 -050061DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050062 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050063
64 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050065 for (int s = 0; s < 3; s++)
66 for (int d = 0; d < 3; d++) {
67 auto srcFmt = (Fmt)s,
68 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050069 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050070
Mike Klein267f5072019-06-03 16:27:46 -050071 buf.writeText(fmt_name(srcFmt));
72 buf.writeText(" over ");
73 buf.writeText(fmt_name(dstFmt));
74 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050075 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050076 }
Mike Klein68c50d02019-05-29 12:57:54 -050077
Mike Klein7b7077c2019-06-03 17:10:59 -050078 // Write the I32 Srcovers also.
79 {
Mike Kleinaab45b52019-07-02 15:39:23 -050080 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050081 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050082 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050083 }
84 {
Mike Kleinaab45b52019-07-02 15:39:23 -050085 SrcoverBuilder_I32 builder;
Mike Klein7b7077c2019-06-03 17:10:59 -050086 buf.writeText("I32 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050087 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -050088 }
89 {
Mike Kleinaab45b52019-07-02 15:39:23 -050090 SrcoverBuilder_I32_SWAR builder;
Mike Klein7b7077c2019-06-03 17:10:59 -050091 buf.writeText("I32 (SWAR) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050092 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -050093 }
94
Mike Kleinf9963112019-08-08 15:13:25 -040095 {
96 skvm::Builder b;
97 skvm::Arg arg = b.varying<int>();
98
99 // x and y can both be hoisted,
Mike Klein0f61c122019-10-16 10:46:01 -0500100 // and x can die at y, while y must live for the loop.
Mike Kleinf9963112019-08-08 15:13:25 -0400101 skvm::I32 x = b.splat(1),
102 y = b.add(x, b.splat(2));
103 b.store32(arg, b.mul(b.load32(arg), y));
104
105 skvm::Program program = b.done();
106 REPORTER_ASSERT(r, program.nregs() == 2);
107
108 std::vector<skvm::Builder::Instruction> insts = b.program();
109 REPORTER_ASSERT(r, insts.size() == 6);
Mike Klein0f61c122019-10-16 10:46:01 -0500110 REPORTER_ASSERT(r, insts[0].can_hoist && insts[0].death == 2 && !insts[0].used_in_loop);
111 REPORTER_ASSERT(r, insts[1].can_hoist && insts[1].death == 2 && !insts[1].used_in_loop);
112 REPORTER_ASSERT(r, insts[2].can_hoist && insts[2].death == 4 && insts[2].used_in_loop);
113 REPORTER_ASSERT(r, !insts[3].can_hoist);
114 REPORTER_ASSERT(r, !insts[4].can_hoist);
115 REPORTER_ASSERT(r, !insts[5].can_hoist);
Mike Kleinf9963112019-08-08 15:13:25 -0400116
117 dump(b, &buf);
118
Mike Kleinb5a30762019-10-16 10:11:56 -0500119 test_jit_and_interpreter(r, std::move(program), [&](const skvm::Program& program) {
Mike Kleinf9963112019-08-08 15:13:25 -0400120 int arg[] = {0,1,2,3,4,5,6,7,8,9};
121
122 program.eval(SK_ARRAY_COUNT(arg), arg);
123
124 for (int i = 0; i < (int)SK_ARRAY_COUNT(arg); i++) {
125 REPORTER_ASSERT(r, arg[i] == i*3);
126 }
127 });
128 }
129
Mike Kleind48488b2019-10-22 12:27:58 -0500130 {
131 // Demonstrate the value of program reordering.
132 skvm::Builder b;
133 skvm::Arg sp = b.varying<int>(),
134 dp = b.varying<int>();
135
136 skvm::I32 byte = b.splat(0xff);
137
138 skvm::I32 src = b.load32(sp),
139 sr = b.extract(src, 0, byte),
140 sg = b.extract(src, 8, byte),
141 sb = b.extract(src, 16, byte),
142 sa = b.extract(src, 24, byte);
143
144 skvm::I32 dst = b.load32(dp),
145 dr = b.extract(dst, 0, byte),
146 dg = b.extract(dst, 8, byte),
147 db = b.extract(dst, 16, byte),
148 da = b.extract(dst, 24, byte);
149
150 skvm::I32 R = b.add(sr, dr),
151 G = b.add(sg, dg),
152 B = b.add(sb, db),
153 A = b.add(sa, da);
154
155 skvm::I32 rg = b.pack(R, G, 8),
156 ba = b.pack(B, A, 8),
157 rgba = b.pack(rg, ba, 16);
158
159 b.store32(dp, rgba);
160
161 dump(b, &buf);
162 }
163
Mike Klein17e27142019-11-13 12:14:00 -0600164#if defined(SK_CPU_X86)
Mike Klein267f5072019-06-03 16:27:46 -0500165 sk_sp<SkData> blob = buf.detachAsData();
166 {
167
168 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Mike Klein77163312019-06-04 13:35:32 -0500169 REPORTER_ASSERT(r, expected, "Couldn't load SkVMTest.expected.");
170 if (expected) {
171 if (blob->size() != expected->size()
172 || 0 != memcmp(blob->data(), expected->data(), blob->size())) {
Mike Klein267f5072019-06-03 16:27:46 -0500173
Mike Klein77163312019-06-04 13:35:32 -0500174 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
175 expected->size(), expected->data(),
176 blob->size(), blob->data());
177 }
178
179 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
180 if (out.isValid()) {
181 out.write(blob->data(), blob->size());
182 }
Mike Klein68c50d02019-05-29 12:57:54 -0500183 }
184 }
Mike Klein17e27142019-11-13 12:14:00 -0600185#endif
Mike Klein68c50d02019-05-29 12:57:54 -0500186
Mike Klein9977efa2019-07-15 12:22:36 -0500187 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500188 uint32_t src[9];
189 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500190
Mike Klein92ca3ba2020-01-08 15:49:47 -0600191 test_jit_and_interpreter(r, std::move(program), [&](const skvm::Program& program) {
Mike Klein9977efa2019-07-15 12:22:36 -0500192 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
193 src[i] = 0xbb007733;
194 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500195 }
Mike Klein9977efa2019-07-15 12:22:36 -0500196
197 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
198
199 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
200
201 // dst is probably 0xff2dad72.
202 for (auto got : dst) {
203 auto want = expected;
204 for (int i = 0; i < 4; i++) {
205 uint8_t d = got & 0xff,
206 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500207 if (abs(d-w) >= 2) {
208 SkDebugf("d %02x, w %02x\n", d,w);
209 }
Mike Klein9977efa2019-07-15 12:22:36 -0500210 REPORTER_ASSERT(r, abs(d-w) < 2);
211 got >>= 8;
212 want >>= 8;
213 }
214 }
215 });
Mike Klein3f593792019-06-12 12:54:52 -0500216 };
Mike Klein68c50d02019-05-29 12:57:54 -0500217
Mike Klein37607d42019-07-18 10:17:28 -0500218 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
219 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
220 test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
221 test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500222
Mike Klein92ca3ba2020-01-08 15:49:47 -0600223 test_jit_and_interpreter(r, SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500224 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500225 uint32_t src[9];
226 uint8_t dst[SK_ARRAY_COUNT(src)];
227
228 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
229 src[i] = 0xbb007733;
230 dst[i] = 0x42;
231 }
232
233 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
234 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500235
236 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
237 SkGetPackedG32(over),
238 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500239 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500240
Mike Klein3f593792019-06-12 12:54:52 -0500241 for (auto got : dst) {
242 REPORTER_ASSERT(r, abs(got-want) < 3);
243 }
Mike Klein9977efa2019-07-15 12:22:36 -0500244 });
Mike Klein68c50d02019-05-29 12:57:54 -0500245
Mike Kleinb5a30762019-10-16 10:11:56 -0500246 test_jit_and_interpreter(r, SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500247 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500248 uint8_t src[256],
249 dst[256];
250 for (int i = 0; i < 256; i++) {
251 src[i] = 255 - i;
252 dst[i] = i;
253 }
254
255 program.eval(256, src, dst);
256
257 for (int i = 0; i < 256; i++) {
258 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
259 SkPackARGB32( i, 0,0,0)));
260 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
261 }
Mike Klein9977efa2019-07-15 12:22:36 -0500262 });
Mike Klein68c50d02019-05-29 12:57:54 -0500263}
Mike Klein81756e42019-06-12 11:36:28 -0500264
Mike Klein9fdadb92019-07-30 12:30:13 -0500265DEF_TEST(SkVM_Pointless, r) {
266 // Let's build a program with no memory arguments.
267 // It should all be pegged as dead code, but we should be able to "run" it.
268 skvm::Builder b;
269 {
270 b.add(b.splat(5.0f),
271 b.splat(4.0f));
272 }
273
Mike Kleinb5a30762019-10-16 10:11:56 -0500274 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -0500275 for (int N = 0; N < 64; N++) {
276 program.eval(N);
277 }
278 });
279
280 for (const skvm::Builder::Instruction& inst : b.program()) {
Mike Klein0f61c122019-10-16 10:46:01 -0500281 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -0500282 }
283}
284
Mike Klein81756e42019-06-12 11:36:28 -0500285DEF_TEST(SkVM_LoopCounts, r) {
286 // Make sure we cover all the exact N we want.
287
Mike Klein9977efa2019-07-15 12:22:36 -0500288 // buf[i] += 1
289 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500290 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500291 b.store32(arg,
292 b.add(b.splat(1),
293 b.load32(arg)));
294
Mike Kleinb5a30762019-10-16 10:11:56 -0500295 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500296 int buf[64];
297 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500298 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
299 buf[i] = i;
300 }
301 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500302
Mike Klein9977efa2019-07-15 12:22:36 -0500303 for (int i = 0; i < N; i++) {
304 REPORTER_ASSERT(r, buf[i] == i+1);
305 }
306 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
307 REPORTER_ASSERT(r, buf[i] == i);
308 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500309 }
310 });
Mike Klein81756e42019-06-12 11:36:28 -0500311}
Mike Klein05642042019-06-18 12:16:06 -0500312
Mike Klein81d52672019-07-30 11:11:09 -0500313DEF_TEST(SkVM_gathers, r) {
314 skvm::Builder b;
315 {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600316 skvm::Arg uniforms = b.uniform(),
317 buf32 = b.varying<int>(),
318 buf16 = b.varying<uint16_t>(),
319 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500320
321 skvm::I32 x = b.load32(buf32);
322
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600323 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
324 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
325 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500326 }
327
Mike Kleinb5a30762019-10-16 10:11:56 -0500328 test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500329 const int img[] = {12,34,56,78, 90,98,76,54};
330
331 constexpr int N = 20;
332 int buf32[N];
333 uint16_t buf16[N];
334 uint8_t buf8 [N];
335
336 for (int i = 0; i < 20; i++) {
337 buf32[i] = i;
338 }
339
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600340 struct Uniforms {
341 const int* img;
342 } uniforms{img};
343
344 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500345 int i = 0;
346 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
347 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
348 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
349 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
350 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
351 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
352 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
353 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
354
355 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
356 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
357 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
358 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
359 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
360 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
361 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
362 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
363
364 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
365 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
366 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
367 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
368 });
369}
370
371DEF_TEST(SkVM_bitops, r) {
372 skvm::Builder b;
373 {
374 skvm::Arg ptr = b.varying<int>();
375
376 skvm::I32 x = b.load32(ptr);
377
378 x = b.bit_and (x, b.splat(0xf1)); // 0x40
379 x = b.bit_or (x, b.splat(0x80)); // 0xc0
380 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
381 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
382
383 x = b.shl(x, 28); // 0xe000'0000
384 x = b.sra(x, 28); // 0xffff'fffe
385 x = b.shr(x, 1); // 0x7fff'ffff
386
387 b.store32(ptr, x);
388 }
389
Mike Klein92ca3ba2020-01-08 15:49:47 -0600390 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500391 int x = 0x42;
392 program.eval(1, &x);
393 REPORTER_ASSERT(r, x == 0x7fff'ffff);
394 });
395}
396
397DEF_TEST(SkVM_f32, r) {
398 skvm::Builder b;
399 {
400 skvm::Arg arg = b.varying<float>();
401
402 skvm::F32 x = b.bit_cast(b.load32(arg)),
403 y = b.add(x,x), // y = 2x
404 z = b.sub(y,x), // z = 2x-x = x
405 w = b.div(z,x); // w = x/x = 1
406 b.store32(arg, b.bit_cast(w));
407 }
408
Mike Kleinb5a30762019-10-16 10:11:56 -0500409 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500410 float buf[] = { 1,2,3,4,5,6,7,8,9 };
411 program.eval(SK_ARRAY_COUNT(buf), buf);
412 for (float v : buf) {
413 REPORTER_ASSERT(r, v == 1.0f);
414 }
415 });
416}
417
418DEF_TEST(SkVM_cmp_i32, r) {
419 skvm::Builder b;
420 {
421 skvm::I32 x = b.load32(b.varying<int>());
422
423 auto to_bit = [&](int shift, skvm::I32 mask) {
424 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
425 };
426
427 skvm::I32 m = b.splat(0);
428 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
429 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
430 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
431 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
432 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
433 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
434
435 b.store32(b.varying<int>(), m);
436 }
437
Mike Kleinb5a30762019-10-16 10:11:56 -0500438 test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500439 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
440 int out[SK_ARRAY_COUNT(in)];
441
442 program.eval(SK_ARRAY_COUNT(in), in, out);
443
444 REPORTER_ASSERT(r, out[0] == 0b001111);
445 REPORTER_ASSERT(r, out[1] == 0b001100);
446 REPORTER_ASSERT(r, out[2] == 0b001010);
447 REPORTER_ASSERT(r, out[3] == 0b001010);
448 REPORTER_ASSERT(r, out[4] == 0b000010);
449 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
450 REPORTER_ASSERT(r, out[i] == 0b110010);
451 }
452 });
453}
454
455DEF_TEST(SkVM_cmp_f32, r) {
456 skvm::Builder b;
457 {
458 skvm::F32 x = b.bit_cast(b.load32(b.varying<float>()));
459
460 auto to_bit = [&](int shift, skvm::I32 mask) {
461 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
462 };
463
464 skvm::I32 m = b.splat(0);
465 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
466 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
467 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
468 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
469 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
470 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
471
472 b.store32(b.varying<int>(), m);
473 }
474
Mike Klein92ca3ba2020-01-08 15:49:47 -0600475 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500476 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
477 int out[SK_ARRAY_COUNT(in)];
478
479 program.eval(SK_ARRAY_COUNT(in), in, out);
480
481 REPORTER_ASSERT(r, out[0] == 0b001111);
482 REPORTER_ASSERT(r, out[1] == 0b001100);
483 REPORTER_ASSERT(r, out[2] == 0b001010);
484 REPORTER_ASSERT(r, out[3] == 0b001010);
485 REPORTER_ASSERT(r, out[4] == 0b000010);
486 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
487 REPORTER_ASSERT(r, out[i] == 0b110010);
488 }
489 });
490}
491
492DEF_TEST(SkVM_i16x2, r) {
493 skvm::Builder b;
494 {
495 skvm::Arg buf = b.varying<int>();
496
497 skvm::I32 x = b.load32(buf),
498 y = b.add_16x2(x,x), // y = 2x
499 z = b.mul_16x2(x,y), // z = 2x^2
500 w = b.sub_16x2(z,x), // w = x(2x-1)
501 v = b.shl_16x2(w,7), // These shifts will be a no-op
502 u = b.sra_16x2(v,7); // for all but x=12 and x=13.
503 b.store32(buf, u);
504 }
505
Mike Kleinb5a30762019-10-16 10:11:56 -0500506 test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500507 uint16_t buf[] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13 };
508
509 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
510 for (int i = 0; i < 12; i++) {
511 REPORTER_ASSERT(r, buf[i] == i*(2*i-1));
512 }
513 REPORTER_ASSERT(r, buf[12] == 0xff14); // 12*23 = 0x114
514 REPORTER_ASSERT(r, buf[13] == 0xff45); // 13*25 = 0x145
515 });
516}
517
518DEF_TEST(SkVM_cmp_i16, r) {
519 skvm::Builder b;
520 {
521 skvm::Arg buf = b.varying<int>();
522 skvm::I32 x = b.load32(buf);
523
524 auto to_bit = [&](int shift, skvm::I32 mask) {
525 return b.shl_16x2(b.bit_and(mask, b.splat(0x0001'0001)), shift);
526 };
527
528 skvm::I32 m = b.splat(0);
529 m = b.bit_or(m, to_bit(0, b. eq_16x2(x, b.splat(0x0000'0000))));
530 m = b.bit_or(m, to_bit(1, b.neq_16x2(x, b.splat(0x0001'0001))));
531 m = b.bit_or(m, to_bit(2, b. lt_16x2(x, b.splat(0x0002'0002))));
532 m = b.bit_or(m, to_bit(3, b.lte_16x2(x, b.splat(0x0003'0003))));
533 m = b.bit_or(m, to_bit(4, b. gt_16x2(x, b.splat(0x0004'0004))));
534 m = b.bit_or(m, to_bit(5, b.gte_16x2(x, b.splat(0x0005'0005))));
535
536 b.store32(buf, m);
537 }
538
Mike Kleinb5a30762019-10-16 10:11:56 -0500539 test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500540 int16_t buf[] = { 0,1, 2,3, 4,5, 6,7, 8,9 };
541
542 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
543
544 REPORTER_ASSERT(r, buf[0] == 0b001111);
545 REPORTER_ASSERT(r, buf[1] == 0b001100);
546 REPORTER_ASSERT(r, buf[2] == 0b001010);
547 REPORTER_ASSERT(r, buf[3] == 0b001010);
548 REPORTER_ASSERT(r, buf[4] == 0b000010);
549 for (int i = 5; i < (int)SK_ARRAY_COUNT(buf); i++) {
550 REPORTER_ASSERT(r, buf[i] == 0b110010);
551 }
552 });
553}
554
555
Mike Klein4a131192019-07-19 13:56:41 -0500556DEF_TEST(SkVM_mad, r) {
557 // This program is designed to exercise the tricky corners of instruction
558 // and register selection for Op::mad_f32.
559
560 skvm::Builder b;
561 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500562 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500563
564 skvm::F32 x = b.to_f32(b.load32(arg)),
565 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
566 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
567 w = b.mad(z,z,y), // w can alias z but not y.
568 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600569 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500570 }
571
Mike Kleinb5a30762019-10-16 10:11:56 -0500572 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500573 int x = 2;
574 program.eval(1, &x);
575 // x = 2
576 // y = 2*2 + 2 = 6
577 // z = 6*6 + 2 = 38
578 // w = 38*38 + 6 = 1450
579 // v = 1450*6 + 1450 = 10150
580 REPORTER_ASSERT(r, x == 10150);
581 });
582}
583
Mike Klein81d52672019-07-30 11:11:09 -0500584DEF_TEST(SkVM_madder, r) {
585 skvm::Builder b;
586 {
587 skvm::Arg arg = b.varying<float>();
588
589 skvm::F32 x = b.bit_cast(b.load32(arg)),
590 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
591 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
592 w = b.mad(y,y,z);
593 b.store32(arg, b.bit_cast(w));
594 }
595
Mike Kleinb5a30762019-10-16 10:11:56 -0500596 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500597 float x = 2.0f;
598 // y = 2*2 + 2 = 6
599 // z = 6*2 + 6 = 18
600 // w = 6*6 + 18 = 54
601 program.eval(1, &x);
602 REPORTER_ASSERT(r, x == 54.0f);
603 });
604}
605
Mike Kleinf22faaf2020-01-09 07:27:39 -0600606DEF_TEST(SkVM_floor, r) {
607 skvm::Builder b;
608 {
609 skvm::Arg arg = b.varying<float>();
610 b.store32(arg, b.bit_cast(b.floor(b.bit_cast(b.load32(arg)))));
611 }
612
613#if defined(SK_CPU_X86)
614 test_jit_and_interpreter
615#else
616 test_interpreter_only
617#endif
618 (r, b.done(), [&](const skvm::Program& program) {
619 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
620 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
621 program.eval(SK_ARRAY_COUNT(buf), buf);
622 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
623 REPORTER_ASSERT(r, buf[i] == want[i]);
624 }
625 });
626}
627
Mike Kleinf98d0d32019-07-22 14:30:18 -0500628DEF_TEST(SkVM_hoist, r) {
629 // This program uses enough constants that it will fail to JIT if we hoist them.
630 // The JIT will try again without hoisting, and that'll just need 2 registers.
631 skvm::Builder b;
632 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500633 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500634 skvm::I32 x = b.load32(arg);
635 for (int i = 0; i < 32; i++) {
636 x = b.add(x, b.splat(i));
637 }
638 b.store32(arg, x);
639 }
640
Mike Klein0f61c122019-10-16 10:46:01 -0500641 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500642 int x = 4;
643 program.eval(1, &x);
644 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
645 // x += 496
646 REPORTER_ASSERT(r, x == 500);
647 });
648}
649
Mike Kleinb9944122019-08-02 12:22:39 -0500650DEF_TEST(SkVM_select, r) {
651 skvm::Builder b;
652 {
653 skvm::Arg buf = b.varying<int>();
654
655 skvm::I32 x = b.load32(buf);
656
657 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
658
659 b.store32(buf, x);
660 }
661
Mike Klein97afd2e2019-10-16 14:11:27 -0500662 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500663 int buf[] = { 0,1,2,3,4,5,6,7,8 };
664 program.eval(SK_ARRAY_COUNT(buf), buf);
665 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
666 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
667 }
668 });
669}
670
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500671DEF_TEST(SkVM_NewOps, r) {
672 // Exercise a somewhat arbitrary set of new ops.
673 skvm::Builder b;
674 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500675 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500676 uniforms = b.uniform();
677
678 skvm::I32 x = b.load16(buf);
679
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600680 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500681
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600682 x = b.add(x, b.uniform32(uniforms, kPtr+0));
683 x = b.mul(x, b.uniform8 (uniforms, kPtr+4));
684 x = b.sub(x, b.uniform16(uniforms, kPtr+6));
685
686 skvm::I32 limit = b.uniform32(uniforms, kPtr+8);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500687 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
688 x = b.select(b.gt(x, limit ), limit , x);
689
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600690 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500691
692 b.store16(buf, x);
693 }
694
695 if ((false)) {
696 SkDynamicMemoryWStream buf;
697 dump(b, &buf);
698 sk_sp<SkData> blob = buf.detachAsData();
699 SkDebugf("%.*s\n", blob->size(), blob->data());
700 }
701
Mike Kleinb5a30762019-10-16 10:11:56 -0500702 test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500703 const int N = 31;
704 int16_t buf[N];
705 for (int i = 0; i < N; i++) {
706 buf[i] = i;
707 }
708
709 const int M = 16;
710 uint8_t img[M];
711 for (int i = 0; i < M; i++) {
712 img[i] = i*i;
713 }
714
715 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600716 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500717 int add = 5;
718 uint8_t mul = 3;
719 uint16_t sub = 18;
720 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600721 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500722
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600723 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500724
725 for (int i = 0; i < N; i++) {
726 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
727 int x = 3*(i-1);
728
729 // Then that's pinned to the limits of img.
730 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
731 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
732 REPORTER_ASSERT(r, buf[i] == img[x]);
733 }
734 });
735}
736
Mike Klein3f7c8652019-11-07 10:33:56 -0600737DEF_TEST(SkVM_MSAN, r) {
738 // This little memset32() program should be able to JIT, but if we run that
739 // JIT code in an MSAN build, it won't see the writes initialize buf. So
740 // this tests that we're using the interpreter instead.
741 skvm::Builder b;
742 b.store32(b.varying<int>(), b.splat(42));
743
744 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
745 constexpr int K = 17;
746 int buf[K]; // Intentionally uninitialized.
747 program.eval(K, buf);
748 sk_msan_assert_initialized(buf, buf+K);
749 for (int x : buf) {
750 REPORTER_ASSERT(r, x == 42);
751 }
752 });
753}
754
Mike Klein13601172019-11-08 15:01:02 -0600755DEF_TEST(SkVM_assert, r) {
756 skvm::Builder b;
757 b.assert_true(b.lt(b.load32(b.varying<int>()),
758 b.splat(42)));
759
760 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600761 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600762 program.eval(SK_ARRAY_COUNT(buf), buf);
763 });
764}
765
Mike Klein05642042019-06-18 12:16:06 -0500766
Mike Klein05642042019-06-18 12:16:06 -0500767template <typename Fn>
768static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -0400769 uint8_t buf[4096];
770 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -0500771 fn(a);
772
773 REPORTER_ASSERT(r, a.size() == expected.size());
774
Mike Klein88c0a902019-06-24 15:34:02 -0400775 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -0500776 want = expected.begin();
777 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -0500778 REPORTER_ASSERT(r, got[i] == want[i],
779 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -0500780 }
781}
782
783DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -0500784 // Easiest way to generate test cases is
785 //
786 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
787 //
788 // The -x86-asm-syntax=intel bit is optional, controlling the
789 // input syntax only; the output will always be AT&T op x,y,dst style.
790 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
791 // that a bit easier to use here, despite maybe favoring AT&T overall.
792
793 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -0500794 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -0500795 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -0600796 a.int3();
Mike Klein05642042019-06-18 12:16:06 -0500797 a.vzeroupper();
798 a.ret();
799 },{
Mike Kleinee5864a2019-11-11 09:16:44 -0600800 0xcc,
Mike Klein05642042019-06-18 12:16:06 -0500801 0xc5, 0xf8, 0x77,
802 0xc3,
803 });
804
Mike Klein237dbb42019-07-19 09:44:47 -0500805 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -0500806 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500807 a.ret();
808 a.align(4);
809 },{
810 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -0500811 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -0500812 });
Mike Klein61703a62019-06-18 15:01:12 -0500813
Mike Klein397fc882019-06-20 11:37:10 -0500814 test_asm(r, [&](A& a) {
815 a.add(A::rax, 8); // Always good to test rax.
816 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -0500817
Mike Klein397fc882019-06-20 11:37:10 -0500818 a.add(A::rdi, 12); // Last 0x48 REX
819 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -0500820
Mike Klein86a645c2019-07-12 12:29:39 -0500821 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -0500822 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -0500823
Mike Klein397fc882019-06-20 11:37:10 -0500824 a.add(A::rsi, 128); // Requires 4 byte immediate.
825 a.sub(A::r8 , 1000000);
Mike Klein61703a62019-06-18 15:01:12 -0500826 },{
Mike Kleind3e75a72019-06-18 15:26:08 -0500827 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -0500828 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -0500829
830 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -0500831 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -0500832
Mike Klein86a645c2019-07-12 12:29:39 -0500833 0x49, 0x83, 0b11'000'000, 0x07,
834 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -0500835
836 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -0500837 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Klein61703a62019-06-18 15:01:12 -0500838 });
Mike Klein397fc882019-06-20 11:37:10 -0500839
840
841 test_asm(r, [&](A& a) {
842 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
843 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
844 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
845 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
846 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
847 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
848 },{
849 /* VEX */ /*op*/ /*modRM*/
850 0xc5, 0xf5, 0xfe, 0xc2,
851 0xc5, 0x75, 0xfe, 0xc2,
852 0xc5, 0xbd, 0xfe, 0xc2,
853 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
854 0xc4, 0xe2, 0x75, 0x40, 0xc2,
855 0xc5, 0xf5, 0xfa, 0xc2,
856 });
Mike Kleinff0ae812019-06-20 15:03:44 -0500857
858 test_asm(r, [&](A& a) {
Mike Klein714f8cc2019-11-06 12:54:46 -0600859 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
860 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
861 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
862 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
863 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
864 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Kleinb9944122019-08-02 12:22:39 -0500865 },{
866 0xc5,0xf5,0x76,0xc2,
867 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -0600868 0xc5,0xf4,0xc2,0xc2,0x00,
869 0xc5,0xf4,0xc2,0xc2,0x01,
870 0xc5,0xf4,0xc2,0xc2,0x02,
871 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -0500872 });
873
874 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -0600875 a.vminps(A::ymm0, A::ymm1, A::ymm2);
876 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
877 },{
878 0xc5,0xf4,0x5d,0xc2,
879 0xc5,0xf4,0x5f,0xc2,
880 });
881
882 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -0500883 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
884 },{
885 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
886 });
887
888 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -0500889 a.vpsrld(A::ymm15, A::ymm2, 8);
890 a.vpsrld(A::ymm0 , A::ymm8, 5);
891 },{
892 0xc5, 0x85, 0x72,0xd2, 0x08,
893 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
894 });
895
896 test_asm(r, [&](A& a) {
897 a.vpermq(A::ymm1, A::ymm2, 5);
898 },{
899 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
900 });
Mike Kleine5053412019-06-21 12:37:22 -0500901
902 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600903 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
904 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
905 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
906 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
907 },{
908 0xc4,0xe3,0x7d,0x08,0xca,0x00,
909 0xc4,0xe3,0x7d,0x08,0xca,0x01,
910 0xc4,0xe3,0x7d,0x08,0xca,0x02,
911 0xc4,0xe3,0x7d,0x08,0xca,0x03,
912 });
913
914 test_asm(r, [&](A& a) {
Mike Kleine5053412019-06-21 12:37:22 -0500915 A::Label l = a.here();
916 a.byte(1);
917 a.byte(2);
918 a.byte(3);
919 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -0500920
Mike Klein65c10b52019-07-12 09:22:21 -0500921 a.vbroadcastss(A::ymm0 , &l);
922 a.vbroadcastss(A::ymm1 , &l);
923 a.vbroadcastss(A::ymm8 , &l);
924 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -0500925
Mike Klein65c10b52019-07-12 09:22:21 -0500926 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -0600927 a.vpaddd (A::ymm4, A::ymm3, &l);
928 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -0600929
930 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600931
932 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -0500933 },{
934 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -0500935
Mike Kleine5053412019-06-21 12:37:22 -0500936 /* VEX */ /*op*/ /* ModRM */ /* offset */
937 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
938 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
939 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
940 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -0500941
942 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -0600943
944 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
945 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -0600946
Mike Klein8c1e0ef2019-11-12 09:07:23 -0600947 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
948
949 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -0500950 });
Mike Klein060eaaa2019-06-21 14:42:09 -0500951
952 test_asm(r, [&](A& a) {
Mike Klein788967e2019-08-02 10:15:51 -0500953 a.vbroadcastss(A::ymm0, A::rdi, 0);
954 a.vbroadcastss(A::ymm13, A::r14, 7);
955 a.vbroadcastss(A::ymm8, A::rdx, -12);
956 a.vbroadcastss(A::ymm8, A::rdx, 400);
Mike Klein94d054b2019-08-02 10:54:23 -0500957
958 a.vbroadcastss(A::ymm8, A::xmm0);
959 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -0500960 },{
961 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
962 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
963 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
964 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
965 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -0500966
967 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
968 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -0500969 });
970
971 test_asm(r, [&](A& a) {
Mike Klein060eaaa2019-06-21 14:42:09 -0500972 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -0500973 a.jne(&l);
974 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -0500975 a.je (&l);
976 a.jmp(&l);
977 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -0600978 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -0500979
980 a.cmp(A::rdx, 0);
981 a.cmp(A::rax, 12);
982 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -0500983 },{
Mike Klein35b97c32019-07-12 12:32:45 -0500984 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
985 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
986 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
987 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
988 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -0600989 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -0500990
991 0x48,0x83,0xfa,0x00,
992 0x48,0x83,0xf8,0x0c,
993 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -0500994 });
Mike Klein120d9e82019-06-21 15:52:55 -0500995
996 test_asm(r, [&](A& a) {
997 a.vmovups(A::ymm5, A::rsi);
998 a.vmovups(A::rsi, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -0500999
Mike Klein95529e82019-08-02 11:43:43 -05001000 a.vmovups(A::rsi, A::xmm5);
1001
Mike Klein52010b72019-08-02 11:18:00 -05001002 a.vpmovzxwd(A::ymm4, A::rsi);
Mike Kleinae51aa32019-06-21 16:06:03 -05001003 a.vpmovzxbd(A::ymm4, A::rsi);
Mike Kleinf3881b22019-06-21 16:20:24 -05001004
1005 a.vmovq(A::rdx, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001006 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001007 /* VEX */ /*Op*/ /* ModRM */
1008 0xc5, 0xfc, 0x10, 0b00'101'110,
1009 0xc5, 0xfc, 0x11, 0b00'101'110,
1010
Mike Klein95529e82019-08-02 11:43:43 -05001011 0xc5, 0xf8, 0x11, 0b00'101'110,
1012
Mike Klein52010b72019-08-02 11:18:00 -05001013 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001014 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001015
1016 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001017 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001018
1019 test_asm(r, [&](A& a) {
Mike Klein94d054b2019-08-02 10:54:23 -05001020 a.movzbl(A::rax, A::rsi, 0); // Low registers for src and dst.
1021 a.movzbl(A::rax, A::r8, 0); // High src register.
1022 a.movzbl(A::r8 , A::rsi, 0); // High dst register.
1023 a.movzbl(A::r8, A::rsi, 12);
1024 a.movzbl(A::r8, A::rsi, 400);
Mike Klein35b97c32019-07-12 12:32:45 -05001025
1026 a.vmovd(A::rax, A::xmm0);
1027 a.vmovd(A::rax, A::xmm8);
1028 a.vmovd(A::r8, A::xmm0);
1029
1030 a.vmovd(A::xmm0, A::rax);
1031 a.vmovd(A::xmm8, A::rax);
1032 a.vmovd(A::xmm0, A::r8);
1033
1034 a.vmovd_direct(A::rax, A::xmm0);
1035 a.vmovd_direct(A::rax, A::xmm8);
1036 a.vmovd_direct(A::r8, A::xmm0);
1037
1038 a.vmovd_direct(A::xmm0, A::rax);
1039 a.vmovd_direct(A::xmm8, A::rax);
1040 a.vmovd_direct(A::xmm0, A::r8);
1041
1042 a.movb(A::rdx, A::rax);
1043 a.movb(A::rdx, A::r8);
1044 a.movb(A::r8 , A::rax);
1045 },{
1046 0x0f,0xb6,0x06,
1047 0x41,0x0f,0xb6,0x00,
1048 0x44,0x0f,0xb6,0x06,
Mike Klein94d054b2019-08-02 10:54:23 -05001049 0x44,0x0f,0xb6,0x46, 12,
1050 0x44,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
Mike Klein35b97c32019-07-12 12:32:45 -05001051
1052 0xc5,0xf9,0x7e,0x00,
1053 0xc5,0x79,0x7e,0x00,
1054 0xc4,0xc1,0x79,0x7e,0x00,
1055
1056 0xc5,0xf9,0x6e,0x00,
1057 0xc5,0x79,0x6e,0x00,
1058 0xc4,0xc1,0x79,0x6e,0x00,
1059
1060 0xc5,0xf9,0x7e,0xc0,
1061 0xc5,0x79,0x7e,0xc0,
1062 0xc4,0xc1,0x79,0x7e,0xc0,
1063
1064 0xc5,0xf9,0x6e,0xc0,
1065 0xc5,0x79,0x6e,0xc0,
1066 0xc4,0xc1,0x79,0x6e,0xc0,
1067
1068 0x88, 0x02,
1069 0x44, 0x88, 0x02,
1070 0x41, 0x88, 0x00,
1071 });
1072
1073 test_asm(r, [&](A& a) {
Mike Klein52010b72019-08-02 11:18:00 -05001074 a.vpinsrw(A::xmm1, A::xmm8, A::rsi, 4);
1075 a.vpinsrw(A::xmm8, A::xmm1, A::r8, 12);
1076
Mike Klein35b97c32019-07-12 12:32:45 -05001077 a.vpinsrb(A::xmm1, A::xmm8, A::rsi, 4);
1078 a.vpinsrb(A::xmm8, A::xmm1, A::r8, 12);
1079
Mike Klein95529e82019-08-02 11:43:43 -05001080 a.vpextrw(A::rsi, A::xmm8, 7);
1081 a.vpextrw(A::r8, A::xmm1, 15);
1082
Mike Klein35b97c32019-07-12 12:32:45 -05001083 a.vpextrb(A::rsi, A::xmm8, 7);
1084 a.vpextrb(A::r8, A::xmm1, 15);
1085 },{
Mike Klein52010b72019-08-02 11:18:00 -05001086 0xc5,0xb9, 0xc4, 0x0e, 4,
1087 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1088
Mike Klein35b97c32019-07-12 12:32:45 -05001089 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1090 0xc4,0x43,0x71, 0x20, 0x00, 12,
1091
Mike Klein95529e82019-08-02 11:43:43 -05001092 0xc4,0x63,0x79, 0x15, 0x06, 7,
1093 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1094
Mike Klein35b97c32019-07-12 12:32:45 -05001095 0xc4,0x63,0x79, 0x14, 0x06, 7,
1096 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1097 });
1098
1099 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001100 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1101 },{
1102 0xc5, 0x9d, 0xdf, 0xda,
1103 });
Mike Klein9f4df802019-06-24 18:47:16 -04001104
Mike Kleind4546d62019-07-30 12:15:40 -05001105 test_asm(r, [&](A& a) {
1106 a.vmovdqa (A::ymm3, A::ymm2);
1107 a.vcvttps2dq(A::ymm3, A::ymm2);
1108 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001109 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleind4546d62019-07-30 12:15:40 -05001110 },{
1111 0xc5,0xfd,0x6f,0xda,
1112 0xc5,0xfe,0x5b,0xda,
1113 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001114 0xc5,0xfd,0x5b,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001115 });
1116
Mike Kleinbeaa1082020-01-13 14:04:18 -06001117 test_asm(r, [&](A& a) {
1118 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1119 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1120 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1121 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1122 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1123 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1124 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1125 },{
1126 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1127 0xc4,0xe2,0x75,0x92,0x04,0x10,
1128 0xc4,0x62,0x75,0x92,0x14,0x10,
1129 0xc4,0xa2,0x75,0x92,0x04,0x20,
1130 0xc4,0xc2,0x75,0x92,0x04,0x11,
1131 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1132 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1133 });
1134
Mike Kleinc322f632020-01-13 16:18:58 -06001135 test_asm(r, [&](A& a) {
1136 a.movq(A::rax, A::rdi, 0);
1137 a.movq(A::rax, A::rdi, 1);
1138 a.movq(A::rax, A::rdi, 512);
1139 a.movq(A::r15, A::r13, 42);
1140 a.movq(A::rax, A::r13, 42);
1141 a.movq(A::r15, A::rax, 42);
1142 },{
1143 0x48, 0x8b, 0x07,
1144 0x48, 0x8b, 0x47, 0x01,
1145 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1146 0x4d, 0x8b, 0x7d, 0x2a,
1147 0x49, 0x8b, 0x45, 0x2a,
1148 0x4c, 0x8b, 0x78, 0x2a,
1149 });
1150
Mike Klein9f4df802019-06-24 18:47:16 -04001151 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1152
1153 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001154 a.and16b(A::v4, A::v3, A::v1);
1155 a.orr16b(A::v4, A::v3, A::v1);
1156 a.eor16b(A::v4, A::v3, A::v1);
1157 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001158 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001159 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001160
1161 a.add4s(A::v4, A::v3, A::v1);
1162 a.sub4s(A::v4, A::v3, A::v1);
1163 a.mul4s(A::v4, A::v3, A::v1);
1164
Mike Klein97afd2e2019-10-16 14:11:27 -05001165 a.cmeq4s(A::v4, A::v3, A::v1);
1166 a.cmgt4s(A::v4, A::v3, A::v1);
1167
Mike Klein65809142019-06-25 09:44:02 -04001168 a.sub8h(A::v4, A::v3, A::v1);
1169 a.mul8h(A::v4, A::v3, A::v1);
1170
Mike Klein9f4df802019-06-24 18:47:16 -04001171 a.fadd4s(A::v4, A::v3, A::v1);
1172 a.fsub4s(A::v4, A::v3, A::v1);
1173 a.fmul4s(A::v4, A::v3, A::v1);
1174 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001175 a.fmin4s(A::v4, A::v3, A::v1);
1176 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001177
Mike Klein65809142019-06-25 09:44:02 -04001178 a.fmla4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001179
1180 a.fcmeq4s(A::v4, A::v3, A::v1);
1181 a.fcmgt4s(A::v4, A::v3, A::v1);
1182 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001183 },{
Mike Klein65809142019-06-25 09:44:02 -04001184 0x64,0x1c,0x21,0x4e,
1185 0x64,0x1c,0xa1,0x4e,
1186 0x64,0x1c,0x21,0x6e,
1187 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001188 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001189 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001190
1191 0x64,0x84,0xa1,0x4e,
1192 0x64,0x84,0xa1,0x6e,
1193 0x64,0x9c,0xa1,0x4e,
1194
Mike Klein97afd2e2019-10-16 14:11:27 -05001195 0x64,0x8c,0xa1,0x6e,
1196 0x64,0x34,0xa1,0x4e,
1197
Mike Klein65809142019-06-25 09:44:02 -04001198 0x64,0x84,0x61,0x6e,
1199 0x64,0x9c,0x61,0x4e,
1200
Mike Klein9f4df802019-06-24 18:47:16 -04001201 0x64,0xd4,0x21,0x4e,
1202 0x64,0xd4,0xa1,0x4e,
1203 0x64,0xdc,0x21,0x6e,
1204 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001205 0x64,0xf4,0xa1,0x4e,
1206 0x64,0xf4,0x21,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001207
Mike Klein65809142019-06-25 09:44:02 -04001208 0x64,0xcc,0x21,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001209
1210 0x64,0xe4,0x21,0x4e,
1211 0x64,0xe4,0xa1,0x6e,
1212 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001213 });
1214
1215 test_asm(r, [&](A& a) {
1216 a.shl4s(A::v4, A::v3, 0);
1217 a.shl4s(A::v4, A::v3, 1);
1218 a.shl4s(A::v4, A::v3, 8);
1219 a.shl4s(A::v4, A::v3, 16);
1220 a.shl4s(A::v4, A::v3, 31);
1221
1222 a.sshr4s(A::v4, A::v3, 1);
1223 a.sshr4s(A::v4, A::v3, 8);
1224 a.sshr4s(A::v4, A::v3, 31);
1225
1226 a.ushr4s(A::v4, A::v3, 1);
1227 a.ushr4s(A::v4, A::v3, 8);
1228 a.ushr4s(A::v4, A::v3, 31);
1229
1230 a.ushr8h(A::v4, A::v3, 1);
1231 a.ushr8h(A::v4, A::v3, 8);
1232 a.ushr8h(A::v4, A::v3, 15);
1233 },{
1234 0x64,0x54,0x20,0x4f,
1235 0x64,0x54,0x21,0x4f,
1236 0x64,0x54,0x28,0x4f,
1237 0x64,0x54,0x30,0x4f,
1238 0x64,0x54,0x3f,0x4f,
1239
1240 0x64,0x04,0x3f,0x4f,
1241 0x64,0x04,0x38,0x4f,
1242 0x64,0x04,0x21,0x4f,
1243
1244 0x64,0x04,0x3f,0x6f,
1245 0x64,0x04,0x38,0x6f,
1246 0x64,0x04,0x21,0x6f,
1247
1248 0x64,0x04,0x1f,0x6f,
1249 0x64,0x04,0x18,0x6f,
1250 0x64,0x04,0x11,0x6f,
1251 });
1252
1253 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001254 a.sli4s(A::v4, A::v3, 0);
1255 a.sli4s(A::v4, A::v3, 1);
1256 a.sli4s(A::v4, A::v3, 8);
1257 a.sli4s(A::v4, A::v3, 16);
1258 a.sli4s(A::v4, A::v3, 31);
1259 },{
1260 0x64,0x54,0x20,0x6f,
1261 0x64,0x54,0x21,0x6f,
1262 0x64,0x54,0x28,0x6f,
1263 0x64,0x54,0x30,0x6f,
1264 0x64,0x54,0x3f,0x6f,
1265 });
1266
1267 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001268 a.scvtf4s (A::v4, A::v3);
1269 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001270 a.fcvtns4s(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001271 },{
1272 0x64,0xd8,0x21,0x4e,
1273 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001274 0x64,0xa8,0x21,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001275 });
Mike Klein15a368d2019-06-26 10:21:12 -04001276
1277 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001278 a.brk(0);
1279 a.brk(65535);
1280
Mike Klein15a368d2019-06-26 10:21:12 -04001281 a.ret(A::x30); // Conventional ret using link register.
1282 a.ret(A::x13); // Can really return using any register if we like.
1283
1284 a.add(A::x2, A::x2, 4);
1285 a.add(A::x3, A::x2, 32);
1286
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001287 a.sub(A::x2, A::x2, 4);
1288 a.sub(A::x3, A::x2, 32);
1289
Mike Klein15a368d2019-06-26 10:21:12 -04001290 a.subs(A::x2, A::x2, 4);
1291 a.subs(A::x3, A::x2, 32);
1292
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001293 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1294 a.cmp(A::x2, 4);
1295
Mike Klein15a368d2019-06-26 10:21:12 -04001296 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -05001297 a.bne(&l);
1298 a.bne(&l);
1299 a.blt(&l);
1300 a.b(&l);
1301 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001302 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -04001303 },{
Mike Klein37be7712019-11-13 13:19:01 -06001304 0x00,0x00,0x20,0xd4,
1305 0xe0,0xff,0x3f,0xd4,
1306
Mike Klein15a368d2019-06-26 10:21:12 -04001307 0xc0,0x03,0x5f,0xd6,
1308 0xa0,0x01,0x5f,0xd6,
1309
1310 0x42,0x10,0x00,0x91,
1311 0x43,0x80,0x00,0x91,
1312
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001313 0x42,0x10,0x00,0xd1,
1314 0x43,0x80,0x00,0xd1,
1315
Mike Klein15a368d2019-06-26 10:21:12 -04001316 0x42,0x10,0x00,0xf1,
1317 0x43,0x80,0x00,0xf1,
1318
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001319 0x5f,0x10,0x00,0xf1,
1320 0x5f,0x10,0x00,0xf1,
1321
1322 0x01,0x00,0x00,0x54, // b.ne #0
1323 0xe1,0xff,0xff,0x54, // b.ne #-4
1324 0xcb,0xff,0xff,0x54, // b.lt #-8
1325 0xae,0xff,0xff,0x54, // b.al #-12
1326 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1327 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -04001328 });
Mike Kleine51632e2019-06-26 14:47:43 -04001329
Mike Kleince7b88c2019-07-11 14:06:40 -05001330 // Can we cbz() to a not-yet-defined label?
1331 test_asm(r, [&](A& a) {
1332 A::Label l;
1333 a.cbz(A::x2, &l);
1334 a.add(A::x3, A::x2, 32);
1335 a.label(&l);
1336 a.ret(A::x30);
1337 },{
1338 0x42,0x00,0x00,0xb4, // cbz x2, #8
1339 0x43,0x80,0x00,0x91, // add x3, x2, #32
1340 0xc0,0x03,0x5f,0xd6, // ret
1341 });
1342
1343 // If we start a label as a backward label,
1344 // can we redefine it to be a future label?
1345 // (Not sure this is useful... just want to test it works.)
1346 test_asm(r, [&](A& a) {
1347 A::Label l1 = a.here();
1348 a.add(A::x3, A::x2, 32);
1349 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1350
1351 A::Label l2 = a.here(); // Start off the same...
1352 a.add(A::x3, A::x2, 32);
1353 a.cbz(A::x2, &l2); // Looks like this will go backward...
1354 a.add(A::x2, A::x2, 4);
1355 a.add(A::x3, A::x2, 32);
1356 a.label(&l2); // But no... actually forward! What a switcheroo!
1357 },{
1358 0x43,0x80,0x00,0x91, // add x3, x2, #32
1359 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1360
1361 0x43,0x80,0x00,0x91, // add x3, x2, #32
1362 0x62,0x00,0x00,0xb4, // cbz x2, #12
1363 0x42,0x10,0x00,0x91, // add x2, x2, #4
1364 0x43,0x80,0x00,0x91, // add x3, x2, #32
1365 });
1366
Mike Klein81d52672019-07-30 11:11:09 -05001367 // Loading from a label on ARM.
1368 test_asm(r, [&](A& a) {
1369 A::Label fore,aft;
1370 a.label(&fore);
1371 a.word(0x01234567);
1372 a.ldrq(A::v1, &fore);
1373 a.ldrq(A::v2, &aft);
1374 a.label(&aft);
1375 a.word(0x76543210);
1376 },{
1377 0x67,0x45,0x23,0x01,
1378 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1379 0x22,0x00,0x00,0x9c, // ldr q2, #4
1380 0x10,0x32,0x54,0x76,
1381 });
1382
Mike Kleine51632e2019-06-26 14:47:43 -04001383 test_asm(r, [&](A& a) {
1384 a.ldrq(A::v0, A::x8);
1385 a.strq(A::v0, A::x8);
1386 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001387 0x00,0x01,0xc0,0x3d,
1388 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001389 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001390
1391 test_asm(r, [&](A& a) {
1392 a.xtns2h(A::v0, A::v0);
1393 a.xtnh2b(A::v0, A::v0);
1394 a.strs (A::v0, A::x0);
1395
1396 a.ldrs (A::v0, A::x0);
1397 a.uxtlb2h(A::v0, A::v0);
1398 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001399
1400 a.uminv4s(A::v3, A::v4);
1401 a.fmovs (A::x3, A::v4); // fmov w3,s4
Mike Klein1fa149a2019-07-01 11:18:08 -05001402 },{
1403 0x00,0x28,0x61,0x0e,
1404 0x00,0x28,0x21,0x0e,
1405 0x00,0x00,0x00,0xbd,
1406
1407 0x00,0x00,0x40,0xbd,
1408 0x00,0xa4,0x08,0x2f,
1409 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001410
1411 0x83,0xa8,0xb1,0x6e,
1412 0x83,0x00,0x26,0x1e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001413 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001414
1415 test_asm(r, [&](A& a) {
1416 a.ldrb(A::v0, A::x8);
1417 a.strb(A::v0, A::x8);
1418 },{
1419 0x00,0x01,0x40,0x3d,
1420 0x00,0x01,0x00,0x3d,
1421 });
Mike Klein81d52672019-07-30 11:11:09 -05001422
1423 test_asm(r, [&](A& a) {
1424 a.tbl(A::v0, A::v1, A::v2);
1425 },{
1426 0x20,0x00,0x02,0x4e,
1427 });
Mike Klein05642042019-06-18 12:16:06 -05001428}