blob: ad01f473e0b799787ed69dc1ff30f6d8a5f5d055 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
10#include "src/core/SkVM.h"
11#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050012#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050013#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050014
Mike Klein7b7077c2019-06-03 17:10:59 -050015using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050016const char* fmt_name(Fmt fmt) {
17 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050018 case Fmt::A8: return "A8";
19 case Fmt::G8: return "G8";
20 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050021 }
22 return "";
23}
24
Mike Klein7e650762019-07-02 15:21:11 -050025namespace {
26 using namespace skvm;
27
28 struct V { Val id; };
29 struct R { Reg id; };
30 struct Shift { int bits; };
31 struct Splat { int bits; };
32 struct Hex { int bits; };
33
34 static void write(SkWStream* o, const char* s) {
35 o->writeText(s);
36 }
37
38 static void write(SkWStream* o, Arg a) {
39 write(o, "arg(");
40 o->writeDecAsText(a.ix);
41 write(o, ")");
42 }
43 static void write(SkWStream* o, V v) {
44 write(o, "v");
45 o->writeDecAsText(v.id);
46 }
47 static void write(SkWStream* o, R r) {
48 write(o, "r");
49 o->writeDecAsText(r.id);
50 }
51 static void write(SkWStream* o, Shift s) {
52 o->writeDecAsText(s.bits);
53 }
54 static void write(SkWStream* o, Splat s) {
55 float f;
56 memcpy(&f, &s.bits, 4);
57 o->writeHexAsText(s.bits);
58 write(o, " (");
59 o->writeScalarAsText(f);
60 write(o, ")");
61 }
62 static void write(SkWStream* o, Hex h) {
63 o->writeHexAsText(h.bits);
64 }
65
66 template <typename T, typename... Ts>
67 static void write(SkWStream* o, T first, Ts... rest) {
68 write(o, first);
69 write(o, " ");
70 write(o, rest...);
71 }
72
73 static void dump(const Builder& builder, SkWStream* o) {
74 const std::vector<Builder::Instruction> program = builder.program();
75
76 o->writeDecAsText(program.size());
77 o->writeText(" values:\n");
78 for (Val id = 0; id < (Val)program.size(); id++) {
79 const Builder::Instruction& inst = program[id];
80 Op op = inst.op;
81 Val x = inst.x,
82 y = inst.y,
83 z = inst.z;
84 int imm = inst.imm;
85 switch (op) {
86 case Op::store8: write(o, "store8" , Arg{imm}, V{x}); break;
87 case Op::store32: write(o, "store32", Arg{imm}, V{x}); break;
88
89 case Op::load8: write(o, V{id}, "= load8" , Arg{imm}); break;
90 case Op::load32: write(o, V{id}, "= load32", Arg{imm}); break;
91
92 case Op::splat: write(o, V{id}, "= splat", Splat{imm}); break;
93
94 case Op::add_f32: write(o, V{id}, "= add_f32", V{x}, V{y} ); break;
95 case Op::sub_f32: write(o, V{id}, "= sub_f32", V{x}, V{y} ); break;
96 case Op::mul_f32: write(o, V{id}, "= mul_f32", V{x}, V{y} ); break;
97 case Op::div_f32: write(o, V{id}, "= div_f32", V{x}, V{y} ); break;
98 case Op::mad_f32: write(o, V{id}, "= mad_f32", V{x}, V{y}, V{z}); break;
99
100 case Op::add_i32: write(o, V{id}, "= add_i32", V{x}, V{y}); break;
101 case Op::sub_i32: write(o, V{id}, "= sub_i32", V{x}, V{y}); break;
102 case Op::mul_i32: write(o, V{id}, "= mul_i32", V{x}, V{y}); break;
103
104 case Op::sub_i16x2: write(o, V{id}, "= sub_i16x2", V{x}, V{y}); break;
105 case Op::mul_i16x2: write(o, V{id}, "= mul_i16x2", V{x}, V{y}); break;
106 case Op::shr_i16x2: write(o, V{id}, "= shr_i16x2", V{x}, Shift{imm}); break;
107
108 case Op::bit_and : write(o, V{id}, "= bit_and" , V{x}, V{y}); break;
109 case Op::bit_or : write(o, V{id}, "= bit_or" , V{x}, V{y}); break;
110 case Op::bit_xor : write(o, V{id}, "= bit_xor" , V{x}, V{y}); break;
111 case Op::bit_clear: write(o, V{id}, "= bit_clear", V{x}, V{y}); break;
112
113 case Op::shl: write(o, V{id}, "= shl", V{x}, Shift{imm}); break;
114 case Op::shr: write(o, V{id}, "= shr", V{x}, Shift{imm}); break;
115 case Op::sra: write(o, V{id}, "= sra", V{x}, Shift{imm}); break;
116
117 case Op::extract: write(o, V{id}, "= extract", V{x}, Shift{imm}, V{y}); break;
118 case Op::pack: write(o, V{id}, "= pack", V{x}, V{y}, Shift{imm}); break;
119
120 case Op::bytes: write(o, V{id}, "= bytes", V{x}, Hex{imm}); break;
121
122 case Op::to_f32: write(o, V{id}, "= to_f32", V{x}); break;
123 case Op::to_i32: write(o, V{id}, "= to_i32", V{x}); break;
124 }
125
126 write(o, "\n");
127 }
128 }
129
130 static void dump(const Program& program, SkWStream* o) {
131 const std::vector<Program::Instruction> instructions = program.instructions();
132 const int nregs = program.nregs();
133 const int loop = program.loop();
134
135 o->writeDecAsText(nregs);
136 o->writeText(" registers, ");
137 o->writeDecAsText(instructions.size());
138 o->writeText(" instructions:\n");
139 for (int i = 0; i < (int)instructions.size(); i++) {
140 if (i == loop) {
141 write(o, "loop:\n");
142 }
143 const Program::Instruction& inst = instructions[i];
144 Op op = inst.op;
145 Reg d = inst.d,
146 x = inst.x,
147 y = inst.y,
148 z = inst.z;
149 int imm = inst.imm;
150 switch (op) {
151 case Op::store8: write(o, "store8" , Arg{imm}, R{x}); break;
152 case Op::store32: write(o, "store32", Arg{imm}, R{x}); break;
153
154 case Op::load8: write(o, R{d}, "= load8" , Arg{imm}); break;
155 case Op::load32: write(o, R{d}, "= load32", Arg{imm}); break;
156
157 case Op::splat: write(o, R{d}, "= splat", Splat{imm}); break;
158
159 case Op::add_f32: write(o, R{d}, "= add_f32", R{x}, R{y} ); break;
160 case Op::sub_f32: write(o, R{d}, "= sub_f32", R{x}, R{y} ); break;
161 case Op::mul_f32: write(o, R{d}, "= mul_f32", R{x}, R{y} ); break;
162 case Op::div_f32: write(o, R{d}, "= div_f32", R{x}, R{y} ); break;
163 case Op::mad_f32: write(o, R{d}, "= mad_f32", R{x}, R{y}, R{z}); break;
164
165 case Op::add_i32: write(o, R{d}, "= add_i32", R{x}, R{y}); break;
166 case Op::sub_i32: write(o, R{d}, "= sub_i32", R{x}, R{y}); break;
167 case Op::mul_i32: write(o, R{d}, "= mul_i32", R{x}, R{y}); break;
168
169 case Op::sub_i16x2: write(o, R{d}, "= sub_i16x2", R{x}, R{y}); break;
170 case Op::mul_i16x2: write(o, R{d}, "= mul_i16x2", R{x}, R{y}); break;
171 case Op::shr_i16x2: write(o, R{d}, "= shr_i16x2", R{x}, Shift{imm}); break;
172
173 case Op::bit_and : write(o, R{d}, "= bit_and" , R{x}, R{y}); break;
174 case Op::bit_or : write(o, R{d}, "= bit_or" , R{x}, R{y}); break;
175 case Op::bit_xor : write(o, R{d}, "= bit_xor" , R{x}, R{y}); break;
176 case Op::bit_clear: write(o, R{d}, "= bit_clear", R{x}, R{y}); break;
177
178 case Op::shl: write(o, R{d}, "= shl", R{x}, Shift{imm}); break;
179 case Op::shr: write(o, R{d}, "= shr", R{x}, Shift{imm}); break;
180 case Op::sra: write(o, R{d}, "= sra", R{x}, Shift{imm}); break;
181
182 case Op::extract: write(o, R{d}, "= extract", R{x}, Shift{imm}, R{y}); break;
183 case Op::pack: write(o, R{d}, "= pack", R{x}, R{y}, Shift{imm}); break;
184
185 case Op::bytes: write(o, R{d}, "= bytes", R{x}, Hex{imm}); break;
186
187 case Op::to_f32: write(o, R{d}, "= to_f32", R{x}); break;
188 case Op::to_i32: write(o, R{d}, "= to_i32", R{x}); break;
189 }
190 write(o, "\n");
191 }
192 }
193
Mike Kleinaab45b52019-07-02 15:39:23 -0500194 static void dump(const Builder& builder, const Program& program, SkWStream* o) {
195 dump(builder, o);
196 o->writeText("\n");
197 dump(program, o);
198 o->writeText("\n");
199 }
200
Mike Klein7e650762019-07-02 15:21:11 -0500201} // namespace
202
Mike Klein9977efa2019-07-15 12:22:36 -0500203template <typename Fn>
204static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
205 test((const skvm::Program&) program);
206 program.dropJIT();
207 test((const skvm::Program&) program);
208}
Mike Klein7e650762019-07-02 15:21:11 -0500209
Mike Klein68c50d02019-05-29 12:57:54 -0500210DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -0500211 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -0500212
213 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -0500214 for (int s = 0; s < 3; s++)
215 for (int d = 0; d < 3; d++) {
216 auto srcFmt = (Fmt)s,
217 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -0500218 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
219 skvm::Program program = builder.done();
Mike Klein68c50d02019-05-29 12:57:54 -0500220
Mike Klein267f5072019-06-03 16:27:46 -0500221 buf.writeText(fmt_name(srcFmt));
222 buf.writeText(" over ");
223 buf.writeText(fmt_name(dstFmt));
224 buf.writeText("\n");
Mike Kleinaab45b52019-07-02 15:39:23 -0500225 dump(builder, program, &buf);
Mike Klein267f5072019-06-03 16:27:46 -0500226 }
Mike Klein68c50d02019-05-29 12:57:54 -0500227
Mike Klein7b7077c2019-06-03 17:10:59 -0500228 // Write the I32 Srcovers also.
229 {
Mike Kleinaab45b52019-07-02 15:39:23 -0500230 SrcoverBuilder_I32_Naive builder;
231 skvm::Program program = builder.done();
Mike Klein397fc882019-06-20 11:37:10 -0500232 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Kleinaab45b52019-07-02 15:39:23 -0500233 dump(builder, program, &buf);
Mike Klein397fc882019-06-20 11:37:10 -0500234 }
235 {
Mike Kleinaab45b52019-07-02 15:39:23 -0500236 SrcoverBuilder_I32 builder;
237 skvm::Program program = builder.done();
Mike Klein7b7077c2019-06-03 17:10:59 -0500238 buf.writeText("I32 8888 over 8888\n");
Mike Kleinaab45b52019-07-02 15:39:23 -0500239 dump(builder, program, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -0500240 }
241 {
Mike Kleinaab45b52019-07-02 15:39:23 -0500242 SrcoverBuilder_I32_SWAR builder;
243 skvm::Program program = builder.done();
Mike Klein7b7077c2019-06-03 17:10:59 -0500244 buf.writeText("I32 (SWAR) 8888 over 8888\n");
Mike Kleinaab45b52019-07-02 15:39:23 -0500245 dump(builder, program, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -0500246 }
247
Mike Klein267f5072019-06-03 16:27:46 -0500248 sk_sp<SkData> blob = buf.detachAsData();
249 {
250
251 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Mike Klein77163312019-06-04 13:35:32 -0500252 REPORTER_ASSERT(r, expected, "Couldn't load SkVMTest.expected.");
253 if (expected) {
254 if (blob->size() != expected->size()
255 || 0 != memcmp(blob->data(), expected->data(), blob->size())) {
Mike Klein267f5072019-06-03 16:27:46 -0500256
Mike Klein77163312019-06-04 13:35:32 -0500257 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
258 expected->size(), expected->data(),
259 blob->size(), blob->data());
260 }
261
262 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
263 if (out.isValid()) {
264 out.write(blob->data(), blob->size());
265 }
Mike Klein68c50d02019-05-29 12:57:54 -0500266 }
267 }
268
Mike Klein9977efa2019-07-15 12:22:36 -0500269 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500270 uint32_t src[9];
271 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500272
Mike Klein9977efa2019-07-15 12:22:36 -0500273 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
274 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
275 src[i] = 0xbb007733;
276 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500277 }
Mike Klein9977efa2019-07-15 12:22:36 -0500278
279 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
280
281 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
282
283 // dst is probably 0xff2dad72.
284 for (auto got : dst) {
285 auto want = expected;
286 for (int i = 0; i < 4; i++) {
287 uint8_t d = got & 0xff,
288 w = want & 0xff;
289 REPORTER_ASSERT(r, abs(d-w) < 2);
290 got >>= 8;
291 want >>= 8;
292 }
293 }
294 });
Mike Klein3f593792019-06-12 12:54:52 -0500295 };
Mike Klein68c50d02019-05-29 12:57:54 -0500296
Mike Klein3f593792019-06-12 12:54:52 -0500297 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done());
Mike Klein397fc882019-06-20 11:37:10 -0500298 test_8888(SrcoverBuilder_I32_Naive{}.done());
Mike Klein56403972019-06-12 14:01:10 -0500299 test_8888(SrcoverBuilder_I32{}.done());
Mike Klein3f593792019-06-12 12:54:52 -0500300 test_8888(SrcoverBuilder_I32_SWAR{}.done());
Mike Klein7b7077c2019-06-03 17:10:59 -0500301
Mike Klein9977efa2019-07-15 12:22:36 -0500302 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
303 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500304 uint32_t src[9];
305 uint8_t dst[SK_ARRAY_COUNT(src)];
306
307 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
308 src[i] = 0xbb007733;
309 dst[i] = 0x42;
310 }
311
312 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
313 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500314
315 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
316 SkGetPackedG32(over),
317 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500318 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500319
Mike Klein3f593792019-06-12 12:54:52 -0500320 for (auto got : dst) {
321 REPORTER_ASSERT(r, abs(got-want) < 3);
322 }
Mike Klein9977efa2019-07-15 12:22:36 -0500323 });
Mike Klein68c50d02019-05-29 12:57:54 -0500324
Mike Klein9977efa2019-07-15 12:22:36 -0500325 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
326 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500327 uint8_t src[256],
328 dst[256];
329 for (int i = 0; i < 256; i++) {
330 src[i] = 255 - i;
331 dst[i] = i;
332 }
333
334 program.eval(256, src, dst);
335
336 for (int i = 0; i < 256; i++) {
337 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
338 SkPackARGB32( i, 0,0,0)));
339 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
340 }
Mike Klein9977efa2019-07-15 12:22:36 -0500341 });
Mike Klein68c50d02019-05-29 12:57:54 -0500342}
Mike Klein81756e42019-06-12 11:36:28 -0500343
344DEF_TEST(SkVM_LoopCounts, r) {
345 // Make sure we cover all the exact N we want.
346
Mike Klein9977efa2019-07-15 12:22:36 -0500347 // buf[i] += 1
348 skvm::Builder b;
349 skvm::Arg arg = b.arg<int>();
350 b.store32(arg,
351 b.add(b.splat(1),
352 b.load32(arg)));
353
Mike Klein81756e42019-06-12 11:36:28 -0500354 int buf[64];
355 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500356 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
357 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
358 buf[i] = i;
359 }
360 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500361
Mike Klein9977efa2019-07-15 12:22:36 -0500362 for (int i = 0; i < N; i++) {
363 REPORTER_ASSERT(r, buf[i] == i+1);
364 }
365 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
366 REPORTER_ASSERT(r, buf[i] == i);
367 }
368 });
Mike Klein81756e42019-06-12 11:36:28 -0500369 }
370}
Mike Klein05642042019-06-18 12:16:06 -0500371
372
Mike Klein05642042019-06-18 12:16:06 -0500373template <typename Fn>
374static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -0400375 uint8_t buf[4096];
376 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -0500377 fn(a);
378
379 REPORTER_ASSERT(r, a.size() == expected.size());
380
Mike Klein88c0a902019-06-24 15:34:02 -0400381 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -0500382 want = expected.begin();
383 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -0500384 REPORTER_ASSERT(r, got[i] == want[i],
385 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -0500386 }
387}
388
389DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -0500390 // Easiest way to generate test cases is
391 //
392 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
393 //
394 // The -x86-asm-syntax=intel bit is optional, controlling the
395 // input syntax only; the output will always be AT&T op x,y,dst style.
396 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
397 // that a bit easier to use here, despite maybe favoring AT&T overall.
398
399 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -0500400 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -0500401 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500402 a.vzeroupper();
403 a.ret();
404 },{
405 0xc5, 0xf8, 0x77,
406 0xc3,
407 });
408
409 // Align should pad with nop().
Mike Klein397fc882019-06-20 11:37:10 -0500410 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500411 a.ret();
412 a.align(4);
413 },{
414 0xc3,
415 0x90, 0x90, 0x90,
416 });
Mike Klein61703a62019-06-18 15:01:12 -0500417
Mike Klein397fc882019-06-20 11:37:10 -0500418 test_asm(r, [&](A& a) {
419 a.add(A::rax, 8); // Always good to test rax.
420 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -0500421
Mike Klein397fc882019-06-20 11:37:10 -0500422 a.add(A::rdi, 12); // Last 0x48 REX
423 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -0500424
Mike Klein86a645c2019-07-12 12:29:39 -0500425 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -0500426 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -0500427
Mike Klein397fc882019-06-20 11:37:10 -0500428 a.add(A::rsi, 128); // Requires 4 byte immediate.
429 a.sub(A::r8 , 1000000);
Mike Klein61703a62019-06-18 15:01:12 -0500430 },{
Mike Kleind3e75a72019-06-18 15:26:08 -0500431 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -0500432 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -0500433
434 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -0500435 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -0500436
Mike Klein86a645c2019-07-12 12:29:39 -0500437 0x49, 0x83, 0b11'000'000, 0x07,
438 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -0500439
440 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -0500441 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Klein61703a62019-06-18 15:01:12 -0500442 });
Mike Klein397fc882019-06-20 11:37:10 -0500443
444
445 test_asm(r, [&](A& a) {
446 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
447 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
448 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
449 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
450 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
451 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
452 },{
453 /* VEX */ /*op*/ /*modRM*/
454 0xc5, 0xf5, 0xfe, 0xc2,
455 0xc5, 0x75, 0xfe, 0xc2,
456 0xc5, 0xbd, 0xfe, 0xc2,
457 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
458 0xc4, 0xe2, 0x75, 0x40, 0xc2,
459 0xc5, 0xf5, 0xfa, 0xc2,
460 });
Mike Kleinff0ae812019-06-20 15:03:44 -0500461
462 test_asm(r, [&](A& a) {
463 a.vpsrld(A::ymm15, A::ymm2, 8);
464 a.vpsrld(A::ymm0 , A::ymm8, 5);
465 },{
466 0xc5, 0x85, 0x72,0xd2, 0x08,
467 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
468 });
469
470 test_asm(r, [&](A& a) {
471 a.vpermq(A::ymm1, A::ymm2, 5);
472 },{
473 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
474 });
Mike Kleine5053412019-06-21 12:37:22 -0500475
476 test_asm(r, [&](A& a) {
477 A::Label l = a.here();
478 a.byte(1);
479 a.byte(2);
480 a.byte(3);
481 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -0500482
Mike Klein65c10b52019-07-12 09:22:21 -0500483 a.vbroadcastss(A::ymm0 , &l);
484 a.vbroadcastss(A::ymm1 , &l);
485 a.vbroadcastss(A::ymm8 , &l);
486 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -0500487
Mike Klein65c10b52019-07-12 09:22:21 -0500488 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -0500489 },{
490 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -0500491
Mike Kleine5053412019-06-21 12:37:22 -0500492 /* VEX */ /*op*/ /* ModRM */ /* offset */
493 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
494 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
495 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
496 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -0500497
498 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Kleine5053412019-06-21 12:37:22 -0500499 });
Mike Klein060eaaa2019-06-21 14:42:09 -0500500
501 test_asm(r, [&](A& a) {
502 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -0500503 a.jne(&l);
504 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -0500505 a.je (&l);
506 a.jmp(&l);
507 a.jl (&l);
508
509 a.cmp(A::rdx, 0);
510 a.cmp(A::rax, 12);
511 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -0500512 },{
Mike Klein35b97c32019-07-12 12:32:45 -0500513 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
514 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
515 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
516 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
517 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
518
519 0x48,0x83,0xfa,0x00,
520 0x48,0x83,0xf8,0x0c,
521 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -0500522 });
Mike Klein120d9e82019-06-21 15:52:55 -0500523
524 test_asm(r, [&](A& a) {
525 a.vmovups(A::ymm5, A::rsi);
526 a.vmovups(A::rsi, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -0500527
528 a.vpmovzxbd(A::ymm4, A::rsi);
Mike Kleinf3881b22019-06-21 16:20:24 -0500529
530 a.vmovq(A::rdx, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -0500531 },{
Mike Kleinae51aa32019-06-21 16:06:03 -0500532 /* VEX */ /*Op*/ /* ModRM */
533 0xc5, 0xfc, 0x10, 0b00'101'110,
534 0xc5, 0xfc, 0x11, 0b00'101'110,
535
536 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -0500537
538 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -0500539 });
Mike Klein2b7b2a22019-06-23 20:35:28 -0400540
541 test_asm(r, [&](A& a) {
Mike Klein35b97c32019-07-12 12:32:45 -0500542 a.movzbl(A::rax, A::rsi); // Low registers for src and dst.
543 a.movzbl(A::rax, A::r8); // High src register.
544 a.movzbl(A::r8 , A::rsi); // High dst register.
545
546 a.vmovd(A::rax, A::xmm0);
547 a.vmovd(A::rax, A::xmm8);
548 a.vmovd(A::r8, A::xmm0);
549
550 a.vmovd(A::xmm0, A::rax);
551 a.vmovd(A::xmm8, A::rax);
552 a.vmovd(A::xmm0, A::r8);
553
554 a.vmovd_direct(A::rax, A::xmm0);
555 a.vmovd_direct(A::rax, A::xmm8);
556 a.vmovd_direct(A::r8, A::xmm0);
557
558 a.vmovd_direct(A::xmm0, A::rax);
559 a.vmovd_direct(A::xmm8, A::rax);
560 a.vmovd_direct(A::xmm0, A::r8);
561
562 a.movb(A::rdx, A::rax);
563 a.movb(A::rdx, A::r8);
564 a.movb(A::r8 , A::rax);
565 },{
566 0x0f,0xb6,0x06,
567 0x41,0x0f,0xb6,0x00,
568 0x44,0x0f,0xb6,0x06,
569
570 0xc5,0xf9,0x7e,0x00,
571 0xc5,0x79,0x7e,0x00,
572 0xc4,0xc1,0x79,0x7e,0x00,
573
574 0xc5,0xf9,0x6e,0x00,
575 0xc5,0x79,0x6e,0x00,
576 0xc4,0xc1,0x79,0x6e,0x00,
577
578 0xc5,0xf9,0x7e,0xc0,
579 0xc5,0x79,0x7e,0xc0,
580 0xc4,0xc1,0x79,0x7e,0xc0,
581
582 0xc5,0xf9,0x6e,0xc0,
583 0xc5,0x79,0x6e,0xc0,
584 0xc4,0xc1,0x79,0x6e,0xc0,
585
586 0x88, 0x02,
587 0x44, 0x88, 0x02,
588 0x41, 0x88, 0x00,
589 });
590
591 test_asm(r, [&](A& a) {
592 a.vpinsrb(A::xmm1, A::xmm8, A::rsi, 4);
593 a.vpinsrb(A::xmm8, A::xmm1, A::r8, 12);
594
595 a.vpextrb(A::rsi, A::xmm8, 7);
596 a.vpextrb(A::r8, A::xmm1, 15);
597 },{
598 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
599 0xc4,0x43,0x71, 0x20, 0x00, 12,
600
601 0xc4,0x63,0x79, 0x14, 0x06, 7,
602 0xc4,0xc3,0x79, 0x14, 0x08, 15,
603 });
604
605 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -0400606 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
607 },{
608 0xc5, 0x9d, 0xdf, 0xda,
609 });
Mike Klein9f4df802019-06-24 18:47:16 -0400610
611 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
612
613 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -0400614 a.and16b(A::v4, A::v3, A::v1);
615 a.orr16b(A::v4, A::v3, A::v1);
616 a.eor16b(A::v4, A::v3, A::v1);
617 a.bic16b(A::v4, A::v3, A::v1);
618
619 a.add4s(A::v4, A::v3, A::v1);
620 a.sub4s(A::v4, A::v3, A::v1);
621 a.mul4s(A::v4, A::v3, A::v1);
622
623 a.sub8h(A::v4, A::v3, A::v1);
624 a.mul8h(A::v4, A::v3, A::v1);
625
Mike Klein9f4df802019-06-24 18:47:16 -0400626 a.fadd4s(A::v4, A::v3, A::v1);
627 a.fsub4s(A::v4, A::v3, A::v1);
628 a.fmul4s(A::v4, A::v3, A::v1);
629 a.fdiv4s(A::v4, A::v3, A::v1);
630
Mike Klein65809142019-06-25 09:44:02 -0400631 a.fmla4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -0400632 },{
Mike Klein65809142019-06-25 09:44:02 -0400633 0x64,0x1c,0x21,0x4e,
634 0x64,0x1c,0xa1,0x4e,
635 0x64,0x1c,0x21,0x6e,
636 0x64,0x1c,0x61,0x4e,
637
638 0x64,0x84,0xa1,0x4e,
639 0x64,0x84,0xa1,0x6e,
640 0x64,0x9c,0xa1,0x4e,
641
642 0x64,0x84,0x61,0x6e,
643 0x64,0x9c,0x61,0x4e,
644
Mike Klein9f4df802019-06-24 18:47:16 -0400645 0x64,0xd4,0x21,0x4e,
646 0x64,0xd4,0xa1,0x4e,
647 0x64,0xdc,0x21,0x6e,
648 0x64,0xfc,0x21,0x6e,
649
Mike Klein65809142019-06-25 09:44:02 -0400650 0x64,0xcc,0x21,0x4e,
651 });
652
653 test_asm(r, [&](A& a) {
654 a.shl4s(A::v4, A::v3, 0);
655 a.shl4s(A::v4, A::v3, 1);
656 a.shl4s(A::v4, A::v3, 8);
657 a.shl4s(A::v4, A::v3, 16);
658 a.shl4s(A::v4, A::v3, 31);
659
660 a.sshr4s(A::v4, A::v3, 1);
661 a.sshr4s(A::v4, A::v3, 8);
662 a.sshr4s(A::v4, A::v3, 31);
663
664 a.ushr4s(A::v4, A::v3, 1);
665 a.ushr4s(A::v4, A::v3, 8);
666 a.ushr4s(A::v4, A::v3, 31);
667
668 a.ushr8h(A::v4, A::v3, 1);
669 a.ushr8h(A::v4, A::v3, 8);
670 a.ushr8h(A::v4, A::v3, 15);
671 },{
672 0x64,0x54,0x20,0x4f,
673 0x64,0x54,0x21,0x4f,
674 0x64,0x54,0x28,0x4f,
675 0x64,0x54,0x30,0x4f,
676 0x64,0x54,0x3f,0x4f,
677
678 0x64,0x04,0x3f,0x4f,
679 0x64,0x04,0x38,0x4f,
680 0x64,0x04,0x21,0x4f,
681
682 0x64,0x04,0x3f,0x6f,
683 0x64,0x04,0x38,0x6f,
684 0x64,0x04,0x21,0x6f,
685
686 0x64,0x04,0x1f,0x6f,
687 0x64,0x04,0x18,0x6f,
688 0x64,0x04,0x11,0x6f,
689 });
690
691 test_asm(r, [&](A& a) {
692 a.scvtf4s (A::v4, A::v3);
693 a.fcvtzs4s(A::v4, A::v3);
694 },{
695 0x64,0xd8,0x21,0x4e,
696 0x64,0xb8,0xa1,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -0400697 });
Mike Klein15a368d2019-06-26 10:21:12 -0400698
699 test_asm(r, [&](A& a) {
700 a.ret(A::x30); // Conventional ret using link register.
701 a.ret(A::x13); // Can really return using any register if we like.
702
703 a.add(A::x2, A::x2, 4);
704 a.add(A::x3, A::x2, 32);
705
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500706 a.sub(A::x2, A::x2, 4);
707 a.sub(A::x3, A::x2, 32);
708
Mike Klein15a368d2019-06-26 10:21:12 -0400709 a.subs(A::x2, A::x2, 4);
710 a.subs(A::x3, A::x2, 32);
711
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500712 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
713 a.cmp(A::x2, 4);
714
Mike Klein15a368d2019-06-26 10:21:12 -0400715 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -0500716 a.bne(&l);
717 a.bne(&l);
718 a.blt(&l);
719 a.b(&l);
720 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -0500721 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -0400722 },{
723 0xc0,0x03,0x5f,0xd6,
724 0xa0,0x01,0x5f,0xd6,
725
726 0x42,0x10,0x00,0x91,
727 0x43,0x80,0x00,0x91,
728
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500729 0x42,0x10,0x00,0xd1,
730 0x43,0x80,0x00,0xd1,
731
Mike Klein15a368d2019-06-26 10:21:12 -0400732 0x42,0x10,0x00,0xf1,
733 0x43,0x80,0x00,0xf1,
734
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500735 0x5f,0x10,0x00,0xf1,
736 0x5f,0x10,0x00,0xf1,
737
738 0x01,0x00,0x00,0x54, // b.ne #0
739 0xe1,0xff,0xff,0x54, // b.ne #-4
740 0xcb,0xff,0xff,0x54, // b.lt #-8
741 0xae,0xff,0xff,0x54, // b.al #-12
742 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
743 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -0400744 });
Mike Kleine51632e2019-06-26 14:47:43 -0400745
Mike Kleince7b88c2019-07-11 14:06:40 -0500746 // Can we cbz() to a not-yet-defined label?
747 test_asm(r, [&](A& a) {
748 A::Label l;
749 a.cbz(A::x2, &l);
750 a.add(A::x3, A::x2, 32);
751 a.label(&l);
752 a.ret(A::x30);
753 },{
754 0x42,0x00,0x00,0xb4, // cbz x2, #8
755 0x43,0x80,0x00,0x91, // add x3, x2, #32
756 0xc0,0x03,0x5f,0xd6, // ret
757 });
758
759 // If we start a label as a backward label,
760 // can we redefine it to be a future label?
761 // (Not sure this is useful... just want to test it works.)
762 test_asm(r, [&](A& a) {
763 A::Label l1 = a.here();
764 a.add(A::x3, A::x2, 32);
765 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
766
767 A::Label l2 = a.here(); // Start off the same...
768 a.add(A::x3, A::x2, 32);
769 a.cbz(A::x2, &l2); // Looks like this will go backward...
770 a.add(A::x2, A::x2, 4);
771 a.add(A::x3, A::x2, 32);
772 a.label(&l2); // But no... actually forward! What a switcheroo!
773 },{
774 0x43,0x80,0x00,0x91, // add x3, x2, #32
775 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
776
777 0x43,0x80,0x00,0x91, // add x3, x2, #32
778 0x62,0x00,0x00,0xb4, // cbz x2, #12
779 0x42,0x10,0x00,0x91, // add x2, x2, #4
780 0x43,0x80,0x00,0x91, // add x3, x2, #32
781 });
782
Mike Kleine51632e2019-06-26 14:47:43 -0400783 test_asm(r, [&](A& a) {
784 a.ldrq(A::v0, A::x8);
785 a.strq(A::v0, A::x8);
786 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500787 0x00,0x01,0xc0,0x3d,
788 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -0400789 });
Mike Klein1fa149a2019-07-01 11:18:08 -0500790
791 test_asm(r, [&](A& a) {
792 a.xtns2h(A::v0, A::v0);
793 a.xtnh2b(A::v0, A::v0);
794 a.strs (A::v0, A::x0);
795
796 a.ldrs (A::v0, A::x0);
797 a.uxtlb2h(A::v0, A::v0);
798 a.uxtlh2s(A::v0, A::v0);
799 },{
800 0x00,0x28,0x61,0x0e,
801 0x00,0x28,0x21,0x0e,
802 0x00,0x00,0x00,0xbd,
803
804 0x00,0x00,0x40,0xbd,
805 0x00,0xa4,0x08,0x2f,
806 0x00,0xa4,0x10,0x2f,
807 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500808
809 test_asm(r, [&](A& a) {
810 a.ldrb(A::v0, A::x8);
811 a.strb(A::v0, A::x8);
812 },{
813 0x00,0x01,0x40,0x3d,
814 0x00,0x01,0x00,0x3d,
815 });
Mike Klein05642042019-06-18 12:16:06 -0500816}