blob: d9593b38af975ea4bd9b245d1f919de35b939a8e [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
10#include "src/core/SkVM.h"
11#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050012#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050013#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050014
Mike Klein7b7077c2019-06-03 17:10:59 -050015using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050016const char* fmt_name(Fmt fmt) {
17 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050018 case Fmt::A8: return "A8";
19 case Fmt::G8: return "G8";
20 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050021 }
22 return "";
23}
24
Mike Klein7e650762019-07-02 15:21:11 -050025namespace {
26 using namespace skvm;
27
28 struct V { Val id; };
29 struct R { Reg id; };
30 struct Shift { int bits; };
31 struct Splat { int bits; };
32 struct Hex { int bits; };
33
34 static void write(SkWStream* o, const char* s) {
35 o->writeText(s);
36 }
37
38 static void write(SkWStream* o, Arg a) {
39 write(o, "arg(");
40 o->writeDecAsText(a.ix);
41 write(o, ")");
42 }
43 static void write(SkWStream* o, V v) {
44 write(o, "v");
45 o->writeDecAsText(v.id);
46 }
47 static void write(SkWStream* o, R r) {
48 write(o, "r");
49 o->writeDecAsText(r.id);
50 }
51 static void write(SkWStream* o, Shift s) {
52 o->writeDecAsText(s.bits);
53 }
54 static void write(SkWStream* o, Splat s) {
55 float f;
56 memcpy(&f, &s.bits, 4);
57 o->writeHexAsText(s.bits);
58 write(o, " (");
59 o->writeScalarAsText(f);
60 write(o, ")");
61 }
62 static void write(SkWStream* o, Hex h) {
63 o->writeHexAsText(h.bits);
64 }
65
66 template <typename T, typename... Ts>
67 static void write(SkWStream* o, T first, Ts... rest) {
68 write(o, first);
69 write(o, " ");
70 write(o, rest...);
71 }
72
Mike Klein62bccda2019-07-18 10:36:45 -050073 static void dump_builder(const Builder& builder, SkWStream* o) {
Mike Klein7e650762019-07-02 15:21:11 -050074 const std::vector<Builder::Instruction> program = builder.program();
75
76 o->writeDecAsText(program.size());
77 o->writeText(" values:\n");
78 for (Val id = 0; id < (Val)program.size(); id++) {
79 const Builder::Instruction& inst = program[id];
80 Op op = inst.op;
81 Val x = inst.x,
82 y = inst.y,
83 z = inst.z;
84 int imm = inst.imm;
Mike Klein5e533c92019-07-22 13:44:54 -050085 write(o, inst.death == 0 ? "☠️ " :
86 inst.hoist ? "↑ " : " ");
Mike Klein7e650762019-07-02 15:21:11 -050087 switch (op) {
88 case Op::store8: write(o, "store8" , Arg{imm}, V{x}); break;
89 case Op::store32: write(o, "store32", Arg{imm}, V{x}); break;
90
91 case Op::load8: write(o, V{id}, "= load8" , Arg{imm}); break;
92 case Op::load32: write(o, V{id}, "= load32", Arg{imm}); break;
93
94 case Op::splat: write(o, V{id}, "= splat", Splat{imm}); break;
95
96 case Op::add_f32: write(o, V{id}, "= add_f32", V{x}, V{y} ); break;
97 case Op::sub_f32: write(o, V{id}, "= sub_f32", V{x}, V{y} ); break;
98 case Op::mul_f32: write(o, V{id}, "= mul_f32", V{x}, V{y} ); break;
99 case Op::div_f32: write(o, V{id}, "= div_f32", V{x}, V{y} ); break;
100 case Op::mad_f32: write(o, V{id}, "= mad_f32", V{x}, V{y}, V{z}); break;
101
102 case Op::add_i32: write(o, V{id}, "= add_i32", V{x}, V{y}); break;
103 case Op::sub_i32: write(o, V{id}, "= sub_i32", V{x}, V{y}); break;
104 case Op::mul_i32: write(o, V{id}, "= mul_i32", V{x}, V{y}); break;
105
106 case Op::sub_i16x2: write(o, V{id}, "= sub_i16x2", V{x}, V{y}); break;
107 case Op::mul_i16x2: write(o, V{id}, "= mul_i16x2", V{x}, V{y}); break;
108 case Op::shr_i16x2: write(o, V{id}, "= shr_i16x2", V{x}, Shift{imm}); break;
109
110 case Op::bit_and : write(o, V{id}, "= bit_and" , V{x}, V{y}); break;
111 case Op::bit_or : write(o, V{id}, "= bit_or" , V{x}, V{y}); break;
112 case Op::bit_xor : write(o, V{id}, "= bit_xor" , V{x}, V{y}); break;
113 case Op::bit_clear: write(o, V{id}, "= bit_clear", V{x}, V{y}); break;
114
115 case Op::shl: write(o, V{id}, "= shl", V{x}, Shift{imm}); break;
116 case Op::shr: write(o, V{id}, "= shr", V{x}, Shift{imm}); break;
117 case Op::sra: write(o, V{id}, "= sra", V{x}, Shift{imm}); break;
118
119 case Op::extract: write(o, V{id}, "= extract", V{x}, Shift{imm}, V{y}); break;
120 case Op::pack: write(o, V{id}, "= pack", V{x}, V{y}, Shift{imm}); break;
121
122 case Op::bytes: write(o, V{id}, "= bytes", V{x}, Hex{imm}); break;
123
124 case Op::to_f32: write(o, V{id}, "= to_f32", V{x}); break;
125 case Op::to_i32: write(o, V{id}, "= to_i32", V{x}); break;
126 }
127
128 write(o, "\n");
129 }
130 }
131
Mike Klein62bccda2019-07-18 10:36:45 -0500132 static void dump_program(const Program& program, SkWStream* o) {
Mike Klein7e650762019-07-02 15:21:11 -0500133 const std::vector<Program::Instruction> instructions = program.instructions();
134 const int nregs = program.nregs();
135 const int loop = program.loop();
136
137 o->writeDecAsText(nregs);
138 o->writeText(" registers, ");
139 o->writeDecAsText(instructions.size());
140 o->writeText(" instructions:\n");
141 for (int i = 0; i < (int)instructions.size(); i++) {
142 if (i == loop) {
143 write(o, "loop:\n");
144 }
145 const Program::Instruction& inst = instructions[i];
146 Op op = inst.op;
147 Reg d = inst.d,
148 x = inst.x,
149 y = inst.y,
150 z = inst.z;
151 int imm = inst.imm;
152 switch (op) {
153 case Op::store8: write(o, "store8" , Arg{imm}, R{x}); break;
154 case Op::store32: write(o, "store32", Arg{imm}, R{x}); break;
155
156 case Op::load8: write(o, R{d}, "= load8" , Arg{imm}); break;
157 case Op::load32: write(o, R{d}, "= load32", Arg{imm}); break;
158
159 case Op::splat: write(o, R{d}, "= splat", Splat{imm}); break;
160
161 case Op::add_f32: write(o, R{d}, "= add_f32", R{x}, R{y} ); break;
162 case Op::sub_f32: write(o, R{d}, "= sub_f32", R{x}, R{y} ); break;
163 case Op::mul_f32: write(o, R{d}, "= mul_f32", R{x}, R{y} ); break;
164 case Op::div_f32: write(o, R{d}, "= div_f32", R{x}, R{y} ); break;
165 case Op::mad_f32: write(o, R{d}, "= mad_f32", R{x}, R{y}, R{z}); break;
166
167 case Op::add_i32: write(o, R{d}, "= add_i32", R{x}, R{y}); break;
168 case Op::sub_i32: write(o, R{d}, "= sub_i32", R{x}, R{y}); break;
169 case Op::mul_i32: write(o, R{d}, "= mul_i32", R{x}, R{y}); break;
170
171 case Op::sub_i16x2: write(o, R{d}, "= sub_i16x2", R{x}, R{y}); break;
172 case Op::mul_i16x2: write(o, R{d}, "= mul_i16x2", R{x}, R{y}); break;
173 case Op::shr_i16x2: write(o, R{d}, "= shr_i16x2", R{x}, Shift{imm}); break;
174
175 case Op::bit_and : write(o, R{d}, "= bit_and" , R{x}, R{y}); break;
176 case Op::bit_or : write(o, R{d}, "= bit_or" , R{x}, R{y}); break;
177 case Op::bit_xor : write(o, R{d}, "= bit_xor" , R{x}, R{y}); break;
178 case Op::bit_clear: write(o, R{d}, "= bit_clear", R{x}, R{y}); break;
179
180 case Op::shl: write(o, R{d}, "= shl", R{x}, Shift{imm}); break;
181 case Op::shr: write(o, R{d}, "= shr", R{x}, Shift{imm}); break;
182 case Op::sra: write(o, R{d}, "= sra", R{x}, Shift{imm}); break;
183
184 case Op::extract: write(o, R{d}, "= extract", R{x}, Shift{imm}, R{y}); break;
185 case Op::pack: write(o, R{d}, "= pack", R{x}, R{y}, Shift{imm}); break;
186
187 case Op::bytes: write(o, R{d}, "= bytes", R{x}, Hex{imm}); break;
188
189 case Op::to_f32: write(o, R{d}, "= to_f32", R{x}); break;
190 case Op::to_i32: write(o, R{d}, "= to_i32", R{x}); break;
191 }
192 write(o, "\n");
193 }
194 }
195
Mike Klein62bccda2019-07-18 10:36:45 -0500196 static void dump(Builder& builder, SkWStream* o) {
197 skvm::Program program = builder.done();
198 dump_builder(builder, o);
Mike Kleinaab45b52019-07-02 15:39:23 -0500199 o->writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -0500200 dump_program(program, o);
Mike Kleinaab45b52019-07-02 15:39:23 -0500201 o->writeText("\n");
202 }
203
Mike Klein7e650762019-07-02 15:21:11 -0500204} // namespace
205
Mike Klein9977efa2019-07-15 12:22:36 -0500206template <typename Fn>
207static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
208 test((const skvm::Program&) program);
209 program.dropJIT();
210 test((const skvm::Program&) program);
211}
Mike Klein7e650762019-07-02 15:21:11 -0500212
Mike Klein68c50d02019-05-29 12:57:54 -0500213DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -0500214 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -0500215
216 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -0500217 for (int s = 0; s < 3; s++)
218 for (int d = 0; d < 3; d++) {
219 auto srcFmt = (Fmt)s,
220 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -0500221 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -0500222
Mike Klein267f5072019-06-03 16:27:46 -0500223 buf.writeText(fmt_name(srcFmt));
224 buf.writeText(" over ");
225 buf.writeText(fmt_name(dstFmt));
226 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -0500227 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -0500228 }
Mike Klein68c50d02019-05-29 12:57:54 -0500229
Mike Klein7b7077c2019-06-03 17:10:59 -0500230 // Write the I32 Srcovers also.
231 {
Mike Kleinaab45b52019-07-02 15:39:23 -0500232 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -0500233 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -0500234 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -0500235 }
236 {
Mike Kleinaab45b52019-07-02 15:39:23 -0500237 SrcoverBuilder_I32 builder;
Mike Klein7b7077c2019-06-03 17:10:59 -0500238 buf.writeText("I32 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -0500239 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -0500240 }
241 {
Mike Kleinaab45b52019-07-02 15:39:23 -0500242 SrcoverBuilder_I32_SWAR builder;
Mike Klein7b7077c2019-06-03 17:10:59 -0500243 buf.writeText("I32 (SWAR) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -0500244 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -0500245 }
246
Mike Klein267f5072019-06-03 16:27:46 -0500247 sk_sp<SkData> blob = buf.detachAsData();
248 {
249
250 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Mike Klein77163312019-06-04 13:35:32 -0500251 REPORTER_ASSERT(r, expected, "Couldn't load SkVMTest.expected.");
252 if (expected) {
253 if (blob->size() != expected->size()
254 || 0 != memcmp(blob->data(), expected->data(), blob->size())) {
Mike Klein267f5072019-06-03 16:27:46 -0500255
Mike Klein77163312019-06-04 13:35:32 -0500256 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
257 expected->size(), expected->data(),
258 blob->size(), blob->data());
259 }
260
261 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
262 if (out.isValid()) {
263 out.write(blob->data(), blob->size());
264 }
Mike Klein68c50d02019-05-29 12:57:54 -0500265 }
266 }
267
Mike Klein9977efa2019-07-15 12:22:36 -0500268 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500269 uint32_t src[9];
270 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500271
Mike Klein9977efa2019-07-15 12:22:36 -0500272 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
273 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
274 src[i] = 0xbb007733;
275 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500276 }
Mike Klein9977efa2019-07-15 12:22:36 -0500277
278 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
279
280 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
281
282 // dst is probably 0xff2dad72.
283 for (auto got : dst) {
284 auto want = expected;
285 for (int i = 0; i < 4; i++) {
286 uint8_t d = got & 0xff,
287 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500288 if (abs(d-w) >= 2) {
289 SkDebugf("d %02x, w %02x\n", d,w);
290 }
Mike Klein9977efa2019-07-15 12:22:36 -0500291 REPORTER_ASSERT(r, abs(d-w) < 2);
292 got >>= 8;
293 want >>= 8;
294 }
295 }
296 });
Mike Klein3f593792019-06-12 12:54:52 -0500297 };
Mike Klein68c50d02019-05-29 12:57:54 -0500298
Mike Klein37607d42019-07-18 10:17:28 -0500299 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
300 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
301 test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
302 test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500303
Mike Klein9977efa2019-07-15 12:22:36 -0500304 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
305 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500306 uint32_t src[9];
307 uint8_t dst[SK_ARRAY_COUNT(src)];
308
309 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
310 src[i] = 0xbb007733;
311 dst[i] = 0x42;
312 }
313
314 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
315 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500316
317 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
318 SkGetPackedG32(over),
319 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500320 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500321
Mike Klein3f593792019-06-12 12:54:52 -0500322 for (auto got : dst) {
323 REPORTER_ASSERT(r, abs(got-want) < 3);
324 }
Mike Klein9977efa2019-07-15 12:22:36 -0500325 });
Mike Klein68c50d02019-05-29 12:57:54 -0500326
Mike Klein9977efa2019-07-15 12:22:36 -0500327 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
328 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500329 uint8_t src[256],
330 dst[256];
331 for (int i = 0; i < 256; i++) {
332 src[i] = 255 - i;
333 dst[i] = i;
334 }
335
336 program.eval(256, src, dst);
337
338 for (int i = 0; i < 256; i++) {
339 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
340 SkPackARGB32( i, 0,0,0)));
341 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
342 }
Mike Klein9977efa2019-07-15 12:22:36 -0500343 });
Mike Klein68c50d02019-05-29 12:57:54 -0500344}
Mike Klein81756e42019-06-12 11:36:28 -0500345
346DEF_TEST(SkVM_LoopCounts, r) {
347 // Make sure we cover all the exact N we want.
348
Mike Klein9977efa2019-07-15 12:22:36 -0500349 // buf[i] += 1
350 skvm::Builder b;
351 skvm::Arg arg = b.arg<int>();
352 b.store32(arg,
353 b.add(b.splat(1),
354 b.load32(arg)));
355
Mike Klein9e2218a2019-07-19 11:13:42 -0500356 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
357 int buf[64];
358 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500359 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
360 buf[i] = i;
361 }
362 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500363
Mike Klein9977efa2019-07-15 12:22:36 -0500364 for (int i = 0; i < N; i++) {
365 REPORTER_ASSERT(r, buf[i] == i+1);
366 }
367 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
368 REPORTER_ASSERT(r, buf[i] == i);
369 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500370 }
371 });
Mike Klein81756e42019-06-12 11:36:28 -0500372}
Mike Klein05642042019-06-18 12:16:06 -0500373
Mike Klein4a131192019-07-19 13:56:41 -0500374DEF_TEST(SkVM_mad, r) {
375 // This program is designed to exercise the tricky corners of instruction
376 // and register selection for Op::mad_f32.
377
378 skvm::Builder b;
379 {
380 skvm::Arg arg = b.arg<int>();
381
382 skvm::F32 x = b.to_f32(b.load32(arg)),
383 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
384 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
385 w = b.mad(z,z,y), // w can alias z but not y.
386 v = b.mad(w,y,w); // Got to stop somewhere.
387 b.store32(arg, b.to_i32(v));
388 }
389
390 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
391 int x = 2;
392 program.eval(1, &x);
393 // x = 2
394 // y = 2*2 + 2 = 6
395 // z = 6*6 + 2 = 38
396 // w = 38*38 + 6 = 1450
397 // v = 1450*6 + 1450 = 10150
398 REPORTER_ASSERT(r, x == 10150);
399 });
400}
401
Mike Kleinf98d0d32019-07-22 14:30:18 -0500402DEF_TEST(SkVM_hoist, r) {
403 // This program uses enough constants that it will fail to JIT if we hoist them.
404 // The JIT will try again without hoisting, and that'll just need 2 registers.
405 skvm::Builder b;
406 {
407 skvm::Arg arg = b.arg<int>();
408 skvm::I32 x = b.load32(arg);
409 for (int i = 0; i < 32; i++) {
410 x = b.add(x, b.splat(i));
411 }
412 b.store32(arg, x);
413 }
414
415 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
416 int x = 4;
417 program.eval(1, &x);
418 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
419 // x += 496
420 REPORTER_ASSERT(r, x == 500);
421 });
422}
423
Mike Klein05642042019-06-18 12:16:06 -0500424
Mike Klein05642042019-06-18 12:16:06 -0500425template <typename Fn>
426static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -0400427 uint8_t buf[4096];
428 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -0500429 fn(a);
430
431 REPORTER_ASSERT(r, a.size() == expected.size());
432
Mike Klein88c0a902019-06-24 15:34:02 -0400433 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -0500434 want = expected.begin();
435 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -0500436 REPORTER_ASSERT(r, got[i] == want[i],
437 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -0500438 }
439}
440
441DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -0500442 // Easiest way to generate test cases is
443 //
444 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
445 //
446 // The -x86-asm-syntax=intel bit is optional, controlling the
447 // input syntax only; the output will always be AT&T op x,y,dst style.
448 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
449 // that a bit easier to use here, despite maybe favoring AT&T overall.
450
451 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -0500452 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -0500453 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500454 a.vzeroupper();
455 a.ret();
456 },{
457 0xc5, 0xf8, 0x77,
458 0xc3,
459 });
460
Mike Klein237dbb42019-07-19 09:44:47 -0500461 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -0500462 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -0500463 a.ret();
464 a.align(4);
465 },{
466 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -0500467 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -0500468 });
Mike Klein61703a62019-06-18 15:01:12 -0500469
Mike Klein397fc882019-06-20 11:37:10 -0500470 test_asm(r, [&](A& a) {
471 a.add(A::rax, 8); // Always good to test rax.
472 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -0500473
Mike Klein397fc882019-06-20 11:37:10 -0500474 a.add(A::rdi, 12); // Last 0x48 REX
475 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -0500476
Mike Klein86a645c2019-07-12 12:29:39 -0500477 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -0500478 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -0500479
Mike Klein397fc882019-06-20 11:37:10 -0500480 a.add(A::rsi, 128); // Requires 4 byte immediate.
481 a.sub(A::r8 , 1000000);
Mike Klein61703a62019-06-18 15:01:12 -0500482 },{
Mike Kleind3e75a72019-06-18 15:26:08 -0500483 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -0500484 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -0500485
486 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -0500487 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -0500488
Mike Klein86a645c2019-07-12 12:29:39 -0500489 0x49, 0x83, 0b11'000'000, 0x07,
490 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -0500491
492 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -0500493 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Klein61703a62019-06-18 15:01:12 -0500494 });
Mike Klein397fc882019-06-20 11:37:10 -0500495
496
497 test_asm(r, [&](A& a) {
498 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
499 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
500 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
501 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
502 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
503 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
504 },{
505 /* VEX */ /*op*/ /*modRM*/
506 0xc5, 0xf5, 0xfe, 0xc2,
507 0xc5, 0x75, 0xfe, 0xc2,
508 0xc5, 0xbd, 0xfe, 0xc2,
509 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
510 0xc4, 0xe2, 0x75, 0x40, 0xc2,
511 0xc5, 0xf5, 0xfa, 0xc2,
512 });
Mike Kleinff0ae812019-06-20 15:03:44 -0500513
514 test_asm(r, [&](A& a) {
515 a.vpsrld(A::ymm15, A::ymm2, 8);
516 a.vpsrld(A::ymm0 , A::ymm8, 5);
517 },{
518 0xc5, 0x85, 0x72,0xd2, 0x08,
519 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
520 });
521
522 test_asm(r, [&](A& a) {
523 a.vpermq(A::ymm1, A::ymm2, 5);
524 },{
525 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
526 });
Mike Kleine5053412019-06-21 12:37:22 -0500527
528 test_asm(r, [&](A& a) {
529 A::Label l = a.here();
530 a.byte(1);
531 a.byte(2);
532 a.byte(3);
533 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -0500534
Mike Klein65c10b52019-07-12 09:22:21 -0500535 a.vbroadcastss(A::ymm0 , &l);
536 a.vbroadcastss(A::ymm1 , &l);
537 a.vbroadcastss(A::ymm8 , &l);
538 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -0500539
Mike Klein65c10b52019-07-12 09:22:21 -0500540 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -0500541 },{
542 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -0500543
Mike Kleine5053412019-06-21 12:37:22 -0500544 /* VEX */ /*op*/ /* ModRM */ /* offset */
545 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
546 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
547 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
548 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -0500549
550 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Kleine5053412019-06-21 12:37:22 -0500551 });
Mike Klein060eaaa2019-06-21 14:42:09 -0500552
553 test_asm(r, [&](A& a) {
554 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -0500555 a.jne(&l);
556 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -0500557 a.je (&l);
558 a.jmp(&l);
559 a.jl (&l);
560
561 a.cmp(A::rdx, 0);
562 a.cmp(A::rax, 12);
563 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -0500564 },{
Mike Klein35b97c32019-07-12 12:32:45 -0500565 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
566 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
567 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
568 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
569 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
570
571 0x48,0x83,0xfa,0x00,
572 0x48,0x83,0xf8,0x0c,
573 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -0500574 });
Mike Klein120d9e82019-06-21 15:52:55 -0500575
576 test_asm(r, [&](A& a) {
577 a.vmovups(A::ymm5, A::rsi);
578 a.vmovups(A::rsi, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -0500579
580 a.vpmovzxbd(A::ymm4, A::rsi);
Mike Kleinf3881b22019-06-21 16:20:24 -0500581
582 a.vmovq(A::rdx, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -0500583 },{
Mike Kleinae51aa32019-06-21 16:06:03 -0500584 /* VEX */ /*Op*/ /* ModRM */
585 0xc5, 0xfc, 0x10, 0b00'101'110,
586 0xc5, 0xfc, 0x11, 0b00'101'110,
587
588 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -0500589
590 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -0500591 });
Mike Klein2b7b2a22019-06-23 20:35:28 -0400592
593 test_asm(r, [&](A& a) {
Mike Klein35b97c32019-07-12 12:32:45 -0500594 a.movzbl(A::rax, A::rsi); // Low registers for src and dst.
595 a.movzbl(A::rax, A::r8); // High src register.
596 a.movzbl(A::r8 , A::rsi); // High dst register.
597
598 a.vmovd(A::rax, A::xmm0);
599 a.vmovd(A::rax, A::xmm8);
600 a.vmovd(A::r8, A::xmm0);
601
602 a.vmovd(A::xmm0, A::rax);
603 a.vmovd(A::xmm8, A::rax);
604 a.vmovd(A::xmm0, A::r8);
605
606 a.vmovd_direct(A::rax, A::xmm0);
607 a.vmovd_direct(A::rax, A::xmm8);
608 a.vmovd_direct(A::r8, A::xmm0);
609
610 a.vmovd_direct(A::xmm0, A::rax);
611 a.vmovd_direct(A::xmm8, A::rax);
612 a.vmovd_direct(A::xmm0, A::r8);
613
614 a.movb(A::rdx, A::rax);
615 a.movb(A::rdx, A::r8);
616 a.movb(A::r8 , A::rax);
617 },{
618 0x0f,0xb6,0x06,
619 0x41,0x0f,0xb6,0x00,
620 0x44,0x0f,0xb6,0x06,
621
622 0xc5,0xf9,0x7e,0x00,
623 0xc5,0x79,0x7e,0x00,
624 0xc4,0xc1,0x79,0x7e,0x00,
625
626 0xc5,0xf9,0x6e,0x00,
627 0xc5,0x79,0x6e,0x00,
628 0xc4,0xc1,0x79,0x6e,0x00,
629
630 0xc5,0xf9,0x7e,0xc0,
631 0xc5,0x79,0x7e,0xc0,
632 0xc4,0xc1,0x79,0x7e,0xc0,
633
634 0xc5,0xf9,0x6e,0xc0,
635 0xc5,0x79,0x6e,0xc0,
636 0xc4,0xc1,0x79,0x6e,0xc0,
637
638 0x88, 0x02,
639 0x44, 0x88, 0x02,
640 0x41, 0x88, 0x00,
641 });
642
643 test_asm(r, [&](A& a) {
644 a.vpinsrb(A::xmm1, A::xmm8, A::rsi, 4);
645 a.vpinsrb(A::xmm8, A::xmm1, A::r8, 12);
646
647 a.vpextrb(A::rsi, A::xmm8, 7);
648 a.vpextrb(A::r8, A::xmm1, 15);
649 },{
650 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
651 0xc4,0x43,0x71, 0x20, 0x00, 12,
652
653 0xc4,0x63,0x79, 0x14, 0x06, 7,
654 0xc4,0xc3,0x79, 0x14, 0x08, 15,
655 });
656
657 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -0400658 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
659 },{
660 0xc5, 0x9d, 0xdf, 0xda,
661 });
Mike Klein9f4df802019-06-24 18:47:16 -0400662
663 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
664
665 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -0400666 a.and16b(A::v4, A::v3, A::v1);
667 a.orr16b(A::v4, A::v3, A::v1);
668 a.eor16b(A::v4, A::v3, A::v1);
669 a.bic16b(A::v4, A::v3, A::v1);
670
671 a.add4s(A::v4, A::v3, A::v1);
672 a.sub4s(A::v4, A::v3, A::v1);
673 a.mul4s(A::v4, A::v3, A::v1);
674
675 a.sub8h(A::v4, A::v3, A::v1);
676 a.mul8h(A::v4, A::v3, A::v1);
677
Mike Klein9f4df802019-06-24 18:47:16 -0400678 a.fadd4s(A::v4, A::v3, A::v1);
679 a.fsub4s(A::v4, A::v3, A::v1);
680 a.fmul4s(A::v4, A::v3, A::v1);
681 a.fdiv4s(A::v4, A::v3, A::v1);
682
Mike Klein65809142019-06-25 09:44:02 -0400683 a.fmla4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -0400684 },{
Mike Klein65809142019-06-25 09:44:02 -0400685 0x64,0x1c,0x21,0x4e,
686 0x64,0x1c,0xa1,0x4e,
687 0x64,0x1c,0x21,0x6e,
688 0x64,0x1c,0x61,0x4e,
689
690 0x64,0x84,0xa1,0x4e,
691 0x64,0x84,0xa1,0x6e,
692 0x64,0x9c,0xa1,0x4e,
693
694 0x64,0x84,0x61,0x6e,
695 0x64,0x9c,0x61,0x4e,
696
Mike Klein9f4df802019-06-24 18:47:16 -0400697 0x64,0xd4,0x21,0x4e,
698 0x64,0xd4,0xa1,0x4e,
699 0x64,0xdc,0x21,0x6e,
700 0x64,0xfc,0x21,0x6e,
701
Mike Klein65809142019-06-25 09:44:02 -0400702 0x64,0xcc,0x21,0x4e,
703 });
704
705 test_asm(r, [&](A& a) {
706 a.shl4s(A::v4, A::v3, 0);
707 a.shl4s(A::v4, A::v3, 1);
708 a.shl4s(A::v4, A::v3, 8);
709 a.shl4s(A::v4, A::v3, 16);
710 a.shl4s(A::v4, A::v3, 31);
711
712 a.sshr4s(A::v4, A::v3, 1);
713 a.sshr4s(A::v4, A::v3, 8);
714 a.sshr4s(A::v4, A::v3, 31);
715
716 a.ushr4s(A::v4, A::v3, 1);
717 a.ushr4s(A::v4, A::v3, 8);
718 a.ushr4s(A::v4, A::v3, 31);
719
720 a.ushr8h(A::v4, A::v3, 1);
721 a.ushr8h(A::v4, A::v3, 8);
722 a.ushr8h(A::v4, A::v3, 15);
723 },{
724 0x64,0x54,0x20,0x4f,
725 0x64,0x54,0x21,0x4f,
726 0x64,0x54,0x28,0x4f,
727 0x64,0x54,0x30,0x4f,
728 0x64,0x54,0x3f,0x4f,
729
730 0x64,0x04,0x3f,0x4f,
731 0x64,0x04,0x38,0x4f,
732 0x64,0x04,0x21,0x4f,
733
734 0x64,0x04,0x3f,0x6f,
735 0x64,0x04,0x38,0x6f,
736 0x64,0x04,0x21,0x6f,
737
738 0x64,0x04,0x1f,0x6f,
739 0x64,0x04,0x18,0x6f,
740 0x64,0x04,0x11,0x6f,
741 });
742
743 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -0500744 a.sli4s(A::v4, A::v3, 0);
745 a.sli4s(A::v4, A::v3, 1);
746 a.sli4s(A::v4, A::v3, 8);
747 a.sli4s(A::v4, A::v3, 16);
748 a.sli4s(A::v4, A::v3, 31);
749 },{
750 0x64,0x54,0x20,0x6f,
751 0x64,0x54,0x21,0x6f,
752 0x64,0x54,0x28,0x6f,
753 0x64,0x54,0x30,0x6f,
754 0x64,0x54,0x3f,0x6f,
755 });
756
757 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -0400758 a.scvtf4s (A::v4, A::v3);
759 a.fcvtzs4s(A::v4, A::v3);
760 },{
761 0x64,0xd8,0x21,0x4e,
762 0x64,0xb8,0xa1,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -0400763 });
Mike Klein15a368d2019-06-26 10:21:12 -0400764
765 test_asm(r, [&](A& a) {
766 a.ret(A::x30); // Conventional ret using link register.
767 a.ret(A::x13); // Can really return using any register if we like.
768
769 a.add(A::x2, A::x2, 4);
770 a.add(A::x3, A::x2, 32);
771
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500772 a.sub(A::x2, A::x2, 4);
773 a.sub(A::x3, A::x2, 32);
774
Mike Klein15a368d2019-06-26 10:21:12 -0400775 a.subs(A::x2, A::x2, 4);
776 a.subs(A::x3, A::x2, 32);
777
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500778 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
779 a.cmp(A::x2, 4);
780
Mike Klein15a368d2019-06-26 10:21:12 -0400781 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -0500782 a.bne(&l);
783 a.bne(&l);
784 a.blt(&l);
785 a.b(&l);
786 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -0500787 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -0400788 },{
789 0xc0,0x03,0x5f,0xd6,
790 0xa0,0x01,0x5f,0xd6,
791
792 0x42,0x10,0x00,0x91,
793 0x43,0x80,0x00,0x91,
794
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500795 0x42,0x10,0x00,0xd1,
796 0x43,0x80,0x00,0xd1,
797
Mike Klein15a368d2019-06-26 10:21:12 -0400798 0x42,0x10,0x00,0xf1,
799 0x43,0x80,0x00,0xf1,
800
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500801 0x5f,0x10,0x00,0xf1,
802 0x5f,0x10,0x00,0xf1,
803
804 0x01,0x00,0x00,0x54, // b.ne #0
805 0xe1,0xff,0xff,0x54, // b.ne #-4
806 0xcb,0xff,0xff,0x54, // b.lt #-8
807 0xae,0xff,0xff,0x54, // b.al #-12
808 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
809 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -0400810 });
Mike Kleine51632e2019-06-26 14:47:43 -0400811
Mike Kleince7b88c2019-07-11 14:06:40 -0500812 // Can we cbz() to a not-yet-defined label?
813 test_asm(r, [&](A& a) {
814 A::Label l;
815 a.cbz(A::x2, &l);
816 a.add(A::x3, A::x2, 32);
817 a.label(&l);
818 a.ret(A::x30);
819 },{
820 0x42,0x00,0x00,0xb4, // cbz x2, #8
821 0x43,0x80,0x00,0x91, // add x3, x2, #32
822 0xc0,0x03,0x5f,0xd6, // ret
823 });
824
825 // If we start a label as a backward label,
826 // can we redefine it to be a future label?
827 // (Not sure this is useful... just want to test it works.)
828 test_asm(r, [&](A& a) {
829 A::Label l1 = a.here();
830 a.add(A::x3, A::x2, 32);
831 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
832
833 A::Label l2 = a.here(); // Start off the same...
834 a.add(A::x3, A::x2, 32);
835 a.cbz(A::x2, &l2); // Looks like this will go backward...
836 a.add(A::x2, A::x2, 4);
837 a.add(A::x3, A::x2, 32);
838 a.label(&l2); // But no... actually forward! What a switcheroo!
839 },{
840 0x43,0x80,0x00,0x91, // add x3, x2, #32
841 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
842
843 0x43,0x80,0x00,0x91, // add x3, x2, #32
844 0x62,0x00,0x00,0xb4, // cbz x2, #12
845 0x42,0x10,0x00,0x91, // add x2, x2, #4
846 0x43,0x80,0x00,0x91, // add x3, x2, #32
847 });
848
Mike Kleine51632e2019-06-26 14:47:43 -0400849 test_asm(r, [&](A& a) {
850 a.ldrq(A::v0, A::x8);
851 a.strq(A::v0, A::x8);
852 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500853 0x00,0x01,0xc0,0x3d,
854 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -0400855 });
Mike Klein1fa149a2019-07-01 11:18:08 -0500856
857 test_asm(r, [&](A& a) {
858 a.xtns2h(A::v0, A::v0);
859 a.xtnh2b(A::v0, A::v0);
860 a.strs (A::v0, A::x0);
861
862 a.ldrs (A::v0, A::x0);
863 a.uxtlb2h(A::v0, A::v0);
864 a.uxtlh2s(A::v0, A::v0);
865 },{
866 0x00,0x28,0x61,0x0e,
867 0x00,0x28,0x21,0x0e,
868 0x00,0x00,0x00,0xbd,
869
870 0x00,0x00,0x40,0xbd,
871 0x00,0xa4,0x08,0x2f,
872 0x00,0xa4,0x10,0x2f,
873 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -0500874
875 test_asm(r, [&](A& a) {
876 a.ldrb(A::v0, A::x8);
877 a.strb(A::v0, A::x8);
878 },{
879 0x00,0x01,0x40,0x3d,
880 0x00,0x01,0x00,0x3d,
881 });
Mike Klein05642042019-06-18 12:16:06 -0500882}