blob: d90901096d1ad36d2f67fe412577a962a2211cf2 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050014#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050015#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050016
Mike Klein7b7077c2019-06-03 17:10:59 -050017using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050018const char* fmt_name(Fmt fmt) {
19 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050020 case Fmt::A8: return "A8";
21 case Fmt::G8: return "G8";
22 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050023 }
24 return "";
25}
26
Mike Klein6b4143e2019-09-18 11:49:29 -050027static void dump(skvm::Builder& builder, SkWStream* o) {
28 skvm::Program program = builder.done();
29 builder.dump(o);
30 o->writeText("\n");
31 program.dump(o);
32 o->writeText("\n");
33}
Mike Klein7e650762019-07-02 15:21:11 -050034
Mike Klein9977efa2019-07-15 12:22:36 -050035template <typename Fn>
Mike Klein10fc1e62020-04-13 11:57:05 -050036static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
Mike Klein10fc1e62020-04-13 11:57:05 -050037 if (program.hasJIT()) {
Mike Kleinb5a30762019-10-16 10:11:56 -050038 test((const skvm::Program&) program);
39 program.dropJIT();
40 }
Mike Klein10fc1e62020-04-13 11:57:05 -050041 test((const skvm::Program&) program);
Mike Kleinb5a30762019-10-16 10:11:56 -050042}
43
44
Mike Klein68c50d02019-05-29 12:57:54 -050045DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050046 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050047
48 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050049 for (int s = 0; s < 3; s++)
50 for (int d = 0; d < 3; d++) {
51 auto srcFmt = (Fmt)s,
52 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050053 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050054
Mike Klein267f5072019-06-03 16:27:46 -050055 buf.writeText(fmt_name(srcFmt));
56 buf.writeText(" over ");
57 buf.writeText(fmt_name(dstFmt));
58 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050059 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050060 }
Mike Klein68c50d02019-05-29 12:57:54 -050061
Mike Klein7b7077c2019-06-03 17:10:59 -050062 // Write the I32 Srcovers also.
63 {
Mike Kleinaab45b52019-07-02 15:39:23 -050064 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050065 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050066 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050067 }
Mike Klein7b7077c2019-06-03 17:10:59 -050068
Mike Kleinf9963112019-08-08 15:13:25 -040069 {
Mike Kleind48488b2019-10-22 12:27:58 -050070 // Demonstrate the value of program reordering.
71 skvm::Builder b;
72 skvm::Arg sp = b.varying<int>(),
73 dp = b.varying<int>();
74
75 skvm::I32 byte = b.splat(0xff);
76
77 skvm::I32 src = b.load32(sp),
78 sr = b.extract(src, 0, byte),
79 sg = b.extract(src, 8, byte),
80 sb = b.extract(src, 16, byte),
81 sa = b.extract(src, 24, byte);
82
83 skvm::I32 dst = b.load32(dp),
84 dr = b.extract(dst, 0, byte),
85 dg = b.extract(dst, 8, byte),
86 db = b.extract(dst, 16, byte),
87 da = b.extract(dst, 24, byte);
88
89 skvm::I32 R = b.add(sr, dr),
90 G = b.add(sg, dg),
91 B = b.add(sb, db),
92 A = b.add(sa, da);
93
94 skvm::I32 rg = b.pack(R, G, 8),
95 ba = b.pack(B, A, 8),
96 rgba = b.pack(rg, ba, 16);
97
98 b.store32(dp, rgba);
99
100 dump(b, &buf);
101 }
102
Mike Klein238105b2020-03-04 17:05:32 -0600103 // Our checked in dump expectations assume we have FMA support.
Mike Klein10fc1e62020-04-13 11:57:05 -0500104 if (skvm::fma_supported()) {
Ben Wagnere8ffb082020-05-04 10:50:08 -0400105 sk_sp<SkData> actual = buf.detachAsData();
106 bool writeActualAsNewExpectation = false;
Mike Klein238105b2020-03-04 17:05:32 -0600107 {
Mike Klein238105b2020-03-04 17:05:32 -0600108 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Ben Wagnere8ffb082020-05-04 10:50:08 -0400109 if (!expected) {
110 ERRORF(r, "Couldn't load SkVMTest.expected.");
111 writeActualAsNewExpectation = true;
Mike Klein267f5072019-06-03 16:27:46 -0500112
Ben Wagnere8ffb082020-05-04 10:50:08 -0400113 } else if (!expected->equals(actual.get())) {
114 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
Adlai Holler684838f2020-05-12 10:41:04 -0400115 (int)expected->size(), expected->data(),
116 (int)actual->size(), actual->data());
Ben Wagnere8ffb082020-05-04 10:50:08 -0400117 writeActualAsNewExpectation = true;
118 }
119 }
120 if (writeActualAsNewExpectation) {
121 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
122 if (out.isValid()) {
123 out.write(actual->data(), actual->size());
Mike Klein77163312019-06-04 13:35:32 -0500124 }
Mike Klein68c50d02019-05-29 12:57:54 -0500125 }
126 }
127
Mike Klein9977efa2019-07-15 12:22:36 -0500128 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500129 uint32_t src[9];
130 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500131
Mike Klein10fc1e62020-04-13 11:57:05 -0500132 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
Mike Klein9977efa2019-07-15 12:22:36 -0500133 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
134 src[i] = 0xbb007733;
135 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500136 }
Mike Klein9977efa2019-07-15 12:22:36 -0500137
138 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
139
140 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
141
142 // dst is probably 0xff2dad72.
143 for (auto got : dst) {
144 auto want = expected;
145 for (int i = 0; i < 4; i++) {
146 uint8_t d = got & 0xff,
147 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500148 if (abs(d-w) >= 2) {
149 SkDebugf("d %02x, w %02x\n", d,w);
150 }
Mike Klein9977efa2019-07-15 12:22:36 -0500151 REPORTER_ASSERT(r, abs(d-w) < 2);
152 got >>= 8;
153 want >>= 8;
154 }
155 }
156 });
Mike Klein3f593792019-06-12 12:54:52 -0500157 };
Mike Klein68c50d02019-05-29 12:57:54 -0500158
Mike Klein37607d42019-07-18 10:17:28 -0500159 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
160 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500161
Mike Klein10fc1e62020-04-13 11:57:05 -0500162 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500163 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500164 uint32_t src[9];
165 uint8_t dst[SK_ARRAY_COUNT(src)];
166
167 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
168 src[i] = 0xbb007733;
169 dst[i] = 0x42;
170 }
171
172 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
173 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500174
175 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
176 SkGetPackedG32(over),
177 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500178 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500179
Mike Klein3f593792019-06-12 12:54:52 -0500180 for (auto got : dst) {
181 REPORTER_ASSERT(r, abs(got-want) < 3);
182 }
Mike Klein9977efa2019-07-15 12:22:36 -0500183 });
Mike Klein68c50d02019-05-29 12:57:54 -0500184
Mike Klein10fc1e62020-04-13 11:57:05 -0500185 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500186 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500187 uint8_t src[256],
188 dst[256];
189 for (int i = 0; i < 256; i++) {
190 src[i] = 255 - i;
191 dst[i] = i;
192 }
193
194 program.eval(256, src, dst);
195
196 for (int i = 0; i < 256; i++) {
197 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
198 SkPackARGB32( i, 0,0,0)));
199 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
200 }
Mike Klein9977efa2019-07-15 12:22:36 -0500201 });
Mike Klein68c50d02019-05-29 12:57:54 -0500202}
Mike Klein81756e42019-06-12 11:36:28 -0500203
Mike Klein7542ab52020-04-02 08:50:16 -0500204DEF_TEST(SkVM_eliminate_dead_code, r) {
205 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400206 {
Mike Klein7542ab52020-04-02 08:50:16 -0500207 skvm::Arg arg = b.varying<int>();
208 skvm::I32 l = b.load32(arg);
209 skvm::I32 a = b.add(l, l);
210 b.add(a, b.splat(7));
211 }
Herb Derbyf20400e2020-03-18 16:11:25 -0400212
Mike Klein7542ab52020-04-02 08:50:16 -0500213 std::vector<skvm::Instruction> program = b.program();
214 REPORTER_ASSERT(r, program.size() == 4);
215
Mike Klein5b701e12020-04-02 10:34:24 -0500216 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -0500217 REPORTER_ASSERT(r, program.size() == 0);
218}
219
Mike Klein9fdadb92019-07-30 12:30:13 -0500220DEF_TEST(SkVM_Pointless, r) {
221 // Let's build a program with no memory arguments.
222 // It should all be pegged as dead code, but we should be able to "run" it.
223 skvm::Builder b;
224 {
225 b.add(b.splat(5.0f),
226 b.splat(4.0f));
227 }
228
Mike Klein10fc1e62020-04-13 11:57:05 -0500229 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -0500230 for (int N = 0; N < 64; N++) {
231 program.eval(N);
232 }
233 });
234
Mike Kleined9b1f12020-02-06 13:02:32 -0600235 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -0500236 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -0500237 }
238}
239
Mike Klein10fc1e62020-04-13 11:57:05 -0500240DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -0600241 skvm::Builder b;
242 b.store32(b.varying<int>(), b.splat(42));
243
Mike Klein10fc1e62020-04-13 11:57:05 -0500244 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
245 int buf[18];
246 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -0600247
Mike Klein10fc1e62020-04-13 11:57:05 -0500248 p.eval(17, buf);
249 for (int i = 0; i < 17; i++) {
250 REPORTER_ASSERT(r, buf[i] == 42);
251 }
252 REPORTER_ASSERT(r, buf[17] == 47);
253 });
Mike Kleinb6149312020-02-26 13:04:23 -0600254}
Mike Klein11efa182020-02-27 12:04:37 -0600255
Mike Klein10fc1e62020-04-13 11:57:05 -0500256DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -0600257 skvm::Builder b;
258 {
259 auto src = b.varying<int>(),
260 dst = b.varying<int>();
261 b.store32(dst, b.load32(src));
262 }
263
Mike Klein10fc1e62020-04-13 11:57:05 -0500264 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
265 int src[] = {1,2,3,4,5,6,7,8,9},
266 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -0600267
Mike Klein10fc1e62020-04-13 11:57:05 -0500268 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
269 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
270 REPORTER_ASSERT(r, dst[i] == src[i]);
271 }
272 size_t i = SK_ARRAY_COUNT(src)-1;
273 REPORTER_ASSERT(r, dst[i] == 0);
274 });
Mike Klein11efa182020-02-27 12:04:37 -0600275}
Mike Kleinb6149312020-02-26 13:04:23 -0600276
Mike Klein81756e42019-06-12 11:36:28 -0500277DEF_TEST(SkVM_LoopCounts, r) {
278 // Make sure we cover all the exact N we want.
279
Mike Klein9977efa2019-07-15 12:22:36 -0500280 // buf[i] += 1
281 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500282 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500283 b.store32(arg,
284 b.add(b.splat(1),
285 b.load32(arg)));
286
Mike Klein10fc1e62020-04-13 11:57:05 -0500287 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500288 int buf[64];
289 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500290 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
291 buf[i] = i;
292 }
293 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500294
Mike Klein9977efa2019-07-15 12:22:36 -0500295 for (int i = 0; i < N; i++) {
296 REPORTER_ASSERT(r, buf[i] == i+1);
297 }
298 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
299 REPORTER_ASSERT(r, buf[i] == i);
300 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500301 }
302 });
Mike Klein81756e42019-06-12 11:36:28 -0500303}
Mike Klein05642042019-06-18 12:16:06 -0500304
Mike Kleinb2b6a992020-01-13 16:34:30 -0600305DEF_TEST(SkVM_gather32, r) {
306 skvm::Builder b;
307 {
308 skvm::Arg uniforms = b.uniform(),
309 buf = b.varying<int>();
310 skvm::I32 x = b.load32(buf);
311 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
312 }
313
Mike Klein10fc1e62020-04-13 11:57:05 -0500314 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600315 const int img[] = {12,34,56,78, 90,98,76,54};
316
317 int buf[20];
318 for (int i = 0; i < 20; i++) {
319 buf[i] = i;
320 }
321
322 struct Uniforms {
323 const int* img;
324 } uniforms{img};
325
326 program.eval(20, &uniforms, buf);
327 int i = 0;
328 REPORTER_ASSERT(r, buf[i] == 12); i++;
329 REPORTER_ASSERT(r, buf[i] == 34); i++;
330 REPORTER_ASSERT(r, buf[i] == 56); i++;
331 REPORTER_ASSERT(r, buf[i] == 78); i++;
332 REPORTER_ASSERT(r, buf[i] == 90); i++;
333 REPORTER_ASSERT(r, buf[i] == 98); i++;
334 REPORTER_ASSERT(r, buf[i] == 76); i++;
335 REPORTER_ASSERT(r, buf[i] == 54); i++;
336
337 REPORTER_ASSERT(r, buf[i] == 12); i++;
338 REPORTER_ASSERT(r, buf[i] == 34); i++;
339 REPORTER_ASSERT(r, buf[i] == 56); i++;
340 REPORTER_ASSERT(r, buf[i] == 78); i++;
341 REPORTER_ASSERT(r, buf[i] == 90); i++;
342 REPORTER_ASSERT(r, buf[i] == 98); i++;
343 REPORTER_ASSERT(r, buf[i] == 76); i++;
344 REPORTER_ASSERT(r, buf[i] == 54); i++;
345
346 REPORTER_ASSERT(r, buf[i] == 12); i++;
347 REPORTER_ASSERT(r, buf[i] == 34); i++;
348 REPORTER_ASSERT(r, buf[i] == 56); i++;
349 REPORTER_ASSERT(r, buf[i] == 78); i++;
350 });
351}
352
Mike Klein81d52672019-07-30 11:11:09 -0500353DEF_TEST(SkVM_gathers, r) {
354 skvm::Builder b;
355 {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600356 skvm::Arg uniforms = b.uniform(),
357 buf32 = b.varying<int>(),
358 buf16 = b.varying<uint16_t>(),
359 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500360
361 skvm::I32 x = b.load32(buf32);
362
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600363 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
364 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
365 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500366 }
367
Mike Klein10fc1e62020-04-13 11:57:05 -0500368 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500369 const int img[] = {12,34,56,78, 90,98,76,54};
370
371 constexpr int N = 20;
372 int buf32[N];
373 uint16_t buf16[N];
374 uint8_t buf8 [N];
375
376 for (int i = 0; i < 20; i++) {
377 buf32[i] = i;
378 }
379
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600380 struct Uniforms {
381 const int* img;
382 } uniforms{img};
383
384 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500385 int i = 0;
386 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
387 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
388 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
389 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
390 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
391 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
392 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
393 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
394
395 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
396 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
397 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
398 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
399 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
400 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
401 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
402 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
403
404 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
405 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
406 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
407 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
408 });
409}
410
Mike Klein21e85eb2020-04-17 13:57:13 -0500411DEF_TEST(SkVM_gathers2, r) {
412 skvm::Builder b;
413 {
414 skvm::Arg uniforms = b.uniform(),
415 buf32 = b.varying<int>(),
416 buf16 = b.varying<uint16_t>(),
417 buf8 = b.varying<uint8_t>();
418
419 skvm::I32 x = b.load32(buf32);
420
421 b.store32(buf32, b.gather32(uniforms,0, x));
422 b.store16(buf16, b.gather16(uniforms,0, x));
423 b.store8 (buf8 , b.gather8 (uniforms,0, x));
424 }
425
426 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
427 uint8_t img[256];
428 for (int i = 0; i < 256; i++) {
429 img[i] = i;
430 }
431
432 int buf32[64];
433 uint16_t buf16[64];
434 uint8_t buf8 [64];
435
436 for (int i = 0; i < 64; i++) {
437 buf32[i] = (i*47)&63;
438 buf16[i] = 0;
439 buf8 [i] = 0;
440 }
441
442 struct Uniforms {
443 const uint8_t* img;
444 } uniforms{img};
445
446 program.eval(64, &uniforms, buf32, buf16, buf8);
447
448 for (int i = 0; i < 64; i++) {
449 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
450 }
451
452 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
453 REPORTER_ASSERT(r, buf16[63] == 0x2322);
454
455 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
456 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
457 });
458}
459
Mike Klein81d52672019-07-30 11:11:09 -0500460DEF_TEST(SkVM_bitops, r) {
461 skvm::Builder b;
462 {
463 skvm::Arg ptr = b.varying<int>();
464
465 skvm::I32 x = b.load32(ptr);
466
Mike Klein4067a942020-04-05 10:25:32 -0500467 x = b.bit_and (x, b.splat(0xf1)); // 0x40
468 x = b.bit_or (x, b.splat(0x80)); // 0xc0
469 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
470 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500471
472 x = b.shl(x, 28); // 0xe000'0000
473 x = b.sra(x, 28); // 0xffff'fffe
474 x = b.shr(x, 1); // 0x7fff'ffff
475
476 b.store32(ptr, x);
477 }
478
Mike Klein10fc1e62020-04-13 11:57:05 -0500479 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500480 int x = 0x42;
481 program.eval(1, &x);
482 REPORTER_ASSERT(r, x == 0x7fff'ffff);
483 });
484}
485
Mike Klein4067a942020-04-05 10:25:32 -0500486DEF_TEST(SkVM_select_is_NaN, r) {
487 skvm::Builder b;
488 {
489 skvm::Arg src = b.varying<float>(),
490 dst = b.varying<float>();
491
492 skvm::F32 x = b.loadF(src);
493 x = select(is_NaN(x), b.splat(0.0f)
494 , x);
495 b.storeF(dst, x);
496 }
497
498 std::vector<skvm::OptimizedInstruction> program = b.optimize();
499 REPORTER_ASSERT(r, program.size() == 4);
500 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
501 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
502 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
503 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
504
Mike Klein10fc1e62020-04-13 11:57:05 -0500505 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500506 // ±NaN, ±0, ±1, ±inf
507 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
508 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
509 uint32_t dst[SK_ARRAY_COUNT(src)];
510 program.eval(SK_ARRAY_COUNT(src), src, dst);
511
512 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
513 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
514 }
515 });
516}
517
Mike Klein81d52672019-07-30 11:11:09 -0500518DEF_TEST(SkVM_f32, r) {
519 skvm::Builder b;
520 {
521 skvm::Arg arg = b.varying<float>();
522
Mike Reedf5ff4c22020-03-23 14:57:53 -0400523 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500524 y = b.add(x,x), // y = 2x
525 z = b.sub(y,x), // z = 2x-x = x
526 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400527 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500528 }
529
Mike Klein10fc1e62020-04-13 11:57:05 -0500530 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500531 float buf[] = { 1,2,3,4,5,6,7,8,9 };
532 program.eval(SK_ARRAY_COUNT(buf), buf);
533 for (float v : buf) {
534 REPORTER_ASSERT(r, v == 1.0f);
535 }
536 });
537}
538
539DEF_TEST(SkVM_cmp_i32, r) {
540 skvm::Builder b;
541 {
542 skvm::I32 x = b.load32(b.varying<int>());
543
544 auto to_bit = [&](int shift, skvm::I32 mask) {
545 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
546 };
547
548 skvm::I32 m = b.splat(0);
549 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
550 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
551 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
552 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
553 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
554 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
555
556 b.store32(b.varying<int>(), m);
557 }
Mike Klein10fc1e62020-04-13 11:57:05 -0500558 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500559 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
560 int out[SK_ARRAY_COUNT(in)];
561
562 program.eval(SK_ARRAY_COUNT(in), in, out);
563
564 REPORTER_ASSERT(r, out[0] == 0b001111);
565 REPORTER_ASSERT(r, out[1] == 0b001100);
566 REPORTER_ASSERT(r, out[2] == 0b001010);
567 REPORTER_ASSERT(r, out[3] == 0b001010);
568 REPORTER_ASSERT(r, out[4] == 0b000010);
569 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
570 REPORTER_ASSERT(r, out[i] == 0b110010);
571 }
572 });
573}
574
575DEF_TEST(SkVM_cmp_f32, r) {
576 skvm::Builder b;
577 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400578 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500579
580 auto to_bit = [&](int shift, skvm::I32 mask) {
581 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
582 };
583
584 skvm::I32 m = b.splat(0);
585 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
586 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
587 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
588 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
589 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
590 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
591
592 b.store32(b.varying<int>(), m);
593 }
594
Mike Klein10fc1e62020-04-13 11:57:05 -0500595 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500596 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
597 int out[SK_ARRAY_COUNT(in)];
598
599 program.eval(SK_ARRAY_COUNT(in), in, out);
600
601 REPORTER_ASSERT(r, out[0] == 0b001111);
602 REPORTER_ASSERT(r, out[1] == 0b001100);
603 REPORTER_ASSERT(r, out[2] == 0b001010);
604 REPORTER_ASSERT(r, out[3] == 0b001010);
605 REPORTER_ASSERT(r, out[4] == 0b000010);
606 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
607 REPORTER_ASSERT(r, out[i] == 0b110010);
608 }
609 });
610}
611
Mike Klein14548b92020-02-28 14:02:29 -0600612DEF_TEST(SkVM_index, r) {
613 skvm::Builder b;
614 b.store32(b.varying<int>(), b.index());
615
Mike Klein10fc1e62020-04-13 11:57:05 -0500616 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600617 int buf[23];
618 program.eval(SK_ARRAY_COUNT(buf), buf);
619 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
620 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
621 }
622 });
623}
624
Mike Klein4a131192019-07-19 13:56:41 -0500625DEF_TEST(SkVM_mad, r) {
626 // This program is designed to exercise the tricky corners of instruction
627 // and register selection for Op::mad_f32.
628
629 skvm::Builder b;
630 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500631 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500632
Mike Kleincac130f2020-09-25 14:47:44 -0500633 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein4a131192019-07-19 13:56:41 -0500634 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
635 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
636 w = b.mad(z,z,y), // w can alias z but not y.
637 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600638 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500639 }
640
Mike Klein10fc1e62020-04-13 11:57:05 -0500641 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500642 int x = 2;
643 program.eval(1, &x);
644 // x = 2
645 // y = 2*2 + 2 = 6
646 // z = 6*6 + 2 = 38
647 // w = 38*38 + 6 = 1450
648 // v = 1450*6 + 1450 = 10150
649 REPORTER_ASSERT(r, x == 10150);
650 });
651}
652
Mike Klein7c0332c2020-03-05 14:18:04 -0600653DEF_TEST(SkVM_fms, r) {
654 // Create a pattern that can be peepholed into an Op::fms_f32.
655 skvm::Builder b;
656 {
657 skvm::Arg arg = b.varying<int>();
658
Mike Kleincac130f2020-09-25 14:47:44 -0500659 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600660 v = b.sub(b.mul(x, b.splat(2.0f)),
661 b.splat(1.0f));
662 b.store32(arg, b.trunc(v));
663 }
664
Mike Klein10fc1e62020-04-13 11:57:05 -0500665 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600666 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
667 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
668
669 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
670 REPORTER_ASSERT(r, buf[i] = 2*i-1);
671 }
672 });
673}
674
675DEF_TEST(SkVM_fnma, r) {
676 // Create a pattern that can be peepholed into an Op::fnma_f32.
677 skvm::Builder b;
678 {
679 skvm::Arg arg = b.varying<int>();
680
Mike Kleincac130f2020-09-25 14:47:44 -0500681 skvm::F32 x = b.to_F32(b.load32(arg)),
Mike Klein7c0332c2020-03-05 14:18:04 -0600682 v = b.sub(b.splat(1.0f),
683 b.mul(x, b.splat(2.0f)));
684 b.store32(arg, b.trunc(v));
685 }
686
Mike Klein10fc1e62020-04-13 11:57:05 -0500687 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600688 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
689 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
690
691 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
692 REPORTER_ASSERT(r, buf[i] = 1-2*i);
693 }
694 });
695}
696
Mike Klein81d52672019-07-30 11:11:09 -0500697DEF_TEST(SkVM_madder, r) {
698 skvm::Builder b;
699 {
700 skvm::Arg arg = b.varying<float>();
701
Mike Reedf5ff4c22020-03-23 14:57:53 -0400702 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500703 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
704 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
705 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400706 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500707 }
708
Mike Klein10fc1e62020-04-13 11:57:05 -0500709 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500710 float x = 2.0f;
711 // y = 2*2 + 2 = 6
712 // z = 6*2 + 6 = 18
713 // w = 6*6 + 18 = 54
714 program.eval(1, &x);
715 REPORTER_ASSERT(r, x == 54.0f);
716 });
717}
718
Mike Kleinf22faaf2020-01-09 07:27:39 -0600719DEF_TEST(SkVM_floor, r) {
720 skvm::Builder b;
721 {
722 skvm::Arg arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400723 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600724 }
725
Mike Klein10fc1e62020-04-13 11:57:05 -0500726 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600727 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
728 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
729 program.eval(SK_ARRAY_COUNT(buf), buf);
730 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
731 REPORTER_ASSERT(r, buf[i] == want[i]);
732 }
733 });
734}
735
Mike Klein5caf7de2020-03-12 11:05:46 -0500736DEF_TEST(SkVM_round, r) {
737 skvm::Builder b;
738 {
739 skvm::Arg src = b.varying<float>();
740 skvm::Arg dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400741 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500742 }
743
744 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
745 // We haven't explicitly guaranteed that here... it just probably is.
Mike Klein10fc1e62020-04-13 11:57:05 -0500746 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500747 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
748 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
749 int dst[SK_ARRAY_COUNT(buf)];
750
751 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
752 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
753 REPORTER_ASSERT(r, dst[i] == want[i]);
754 }
755 });
756}
757
Herb Derbyc02a41f2020-02-28 14:25:45 -0600758DEF_TEST(SkVM_min, r) {
759 skvm::Builder b;
760 {
761 skvm::Arg src1 = b.varying<float>();
762 skvm::Arg src2 = b.varying<float>();
763 skvm::Arg dst = b.varying<float>();
764
Mike Reedf5ff4c22020-03-23 14:57:53 -0400765 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600766 }
767
Mike Klein10fc1e62020-04-13 11:57:05 -0500768 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600769 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
770 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
771 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
772 float d[SK_ARRAY_COUNT(s1)];
773 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
774 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
775 REPORTER_ASSERT(r, d[i] == want[i]);
776 }
777 });
778}
779
780DEF_TEST(SkVM_max, r) {
781 skvm::Builder b;
782 {
783 skvm::Arg src1 = b.varying<float>();
784 skvm::Arg src2 = b.varying<float>();
785 skvm::Arg dst = b.varying<float>();
786
Mike Reedf5ff4c22020-03-23 14:57:53 -0400787 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600788 }
789
Mike Klein10fc1e62020-04-13 11:57:05 -0500790 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600791 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
792 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
793 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
794 float d[SK_ARRAY_COUNT(s1)];
795 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
796 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
797 REPORTER_ASSERT(r, d[i] == want[i]);
798 }
799 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600800}
801
Mike Kleinf98d0d32019-07-22 14:30:18 -0500802DEF_TEST(SkVM_hoist, r) {
803 // This program uses enough constants that it will fail to JIT if we hoist them.
804 // The JIT will try again without hoisting, and that'll just need 2 registers.
805 skvm::Builder b;
806 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500807 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500808 skvm::I32 x = b.load32(arg);
809 for (int i = 0; i < 32; i++) {
810 x = b.add(x, b.splat(i));
811 }
812 b.store32(arg, x);
813 }
814
Mike Klein10fc1e62020-04-13 11:57:05 -0500815 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500816 int x = 4;
817 program.eval(1, &x);
818 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
819 // x += 496
820 REPORTER_ASSERT(r, x == 500);
821 });
822}
823
Mike Kleinb9944122019-08-02 12:22:39 -0500824DEF_TEST(SkVM_select, r) {
825 skvm::Builder b;
826 {
827 skvm::Arg buf = b.varying<int>();
828
829 skvm::I32 x = b.load32(buf);
830
831 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
832
833 b.store32(buf, x);
834 }
835
Mike Klein10fc1e62020-04-13 11:57:05 -0500836 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500837 int buf[] = { 0,1,2,3,4,5,6,7,8 };
838 program.eval(SK_ARRAY_COUNT(buf), buf);
839 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
840 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
841 }
842 });
843}
844
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500845DEF_TEST(SkVM_NewOps, r) {
846 // Exercise a somewhat arbitrary set of new ops.
847 skvm::Builder b;
848 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500849 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500850 uniforms = b.uniform();
851
852 skvm::I32 x = b.load16(buf);
853
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600854 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500855
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600856 x = b.add(x, b.uniform32(uniforms, kPtr+0));
Mike Klein8b16bee2020-11-25 10:54:02 -0600857 x = b.mul(x, b.uniform32(uniforms, kPtr+4));
858 x = b.sub(x, b.uniform32(uniforms, kPtr+8));
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600859
Mike Klein8b16bee2020-11-25 10:54:02 -0600860 skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500861 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
862 x = b.select(b.gt(x, limit ), limit , x);
863
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600864 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500865
866 b.store16(buf, x);
867 }
868
869 if ((false)) {
870 SkDynamicMemoryWStream buf;
871 dump(b, &buf);
872 sk_sp<SkData> blob = buf.detachAsData();
873 SkDebugf("%.*s\n", blob->size(), blob->data());
874 }
875
Mike Klein10fc1e62020-04-13 11:57:05 -0500876 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500877 const int N = 31;
878 int16_t buf[N];
879 for (int i = 0; i < N; i++) {
880 buf[i] = i;
881 }
882
883 const int M = 16;
884 uint8_t img[M];
885 for (int i = 0; i < M; i++) {
886 img[i] = i*i;
887 }
888
889 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600890 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500891 int add = 5;
Mike Klein8b16bee2020-11-25 10:54:02 -0600892 int mul = 3;
893 int sub = 18;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500894 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600895 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500896
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600897 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500898
899 for (int i = 0; i < N; i++) {
900 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
901 int x = 3*(i-1);
902
903 // Then that's pinned to the limits of img.
904 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
905 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
906 REPORTER_ASSERT(r, buf[i] == img[x]);
907 }
908 });
909}
910
Mike Klein5a8404c2020-02-28 14:24:56 -0600911DEF_TEST(SkVM_sqrt, r) {
912 skvm::Builder b;
913 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400914 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600915
Mike Klein10fc1e62020-04-13 11:57:05 -0500916 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600917 constexpr int K = 17;
918 float buf[K];
919 for (int i = 0; i < K; i++) {
920 buf[i] = (float)(i*i);
921 }
922
923 // x^2 -> x
924 program.eval(K, buf);
925
926 for (int i = 0; i < K; i++) {
927 REPORTER_ASSERT(r, buf[i] == (float)i);
928 }
929 });
930}
931
Mike Klein3f7c8652019-11-07 10:33:56 -0600932DEF_TEST(SkVM_MSAN, r) {
933 // This little memset32() program should be able to JIT, but if we run that
934 // JIT code in an MSAN build, it won't see the writes initialize buf. So
935 // this tests that we're using the interpreter instead.
936 skvm::Builder b;
937 b.store32(b.varying<int>(), b.splat(42));
938
Mike Klein10fc1e62020-04-13 11:57:05 -0500939 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600940 constexpr int K = 17;
941 int buf[K]; // Intentionally uninitialized.
942 program.eval(K, buf);
943 sk_msan_assert_initialized(buf, buf+K);
944 for (int x : buf) {
945 REPORTER_ASSERT(r, x == 42);
946 }
947 });
948}
949
Mike Klein13601172019-11-08 15:01:02 -0600950DEF_TEST(SkVM_assert, r) {
951 skvm::Builder b;
952 b.assert_true(b.lt(b.load32(b.varying<int>()),
953 b.splat(42)));
954
Mike Klein10fc1e62020-04-13 11:57:05 -0500955 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600956 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600957 program.eval(SK_ARRAY_COUNT(buf), buf);
958 });
959}
960
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600961DEF_TEST(SkVM_premul, reporter) {
962 // Test that premul is short-circuited when alpha is known opaque.
963 {
964 skvm::Builder p;
965 auto rptr = p.varying<int>(),
966 aptr = p.varying<int>();
967
Mike Reedf5ff4c22020-03-23 14:57:53 -0400968 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600969 g = p.splat(0.0f),
970 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400971 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600972
973 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400974 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600975
976 // load red, load alpha, red *= alpha, store red
977 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
978 }
979
980 {
981 skvm::Builder p;
982 auto rptr = p.varying<int>();
983
Mike Reedf5ff4c22020-03-23 14:57:53 -0400984 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600985 g = p.splat(0.0f),
986 b = p.splat(0.0f),
987 a = p.splat(1.0f);
988
989 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400990 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600991
992 // load red, store red
993 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
994 }
995
996 // Same deal for unpremul.
997 {
998 skvm::Builder p;
999 auto rptr = p.varying<int>(),
1000 aptr = p.varying<int>();
1001
Mike Reedf5ff4c22020-03-23 14:57:53 -04001002 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001003 g = p.splat(0.0f),
1004 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001005 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001006
1007 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001008 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001009
1010 // load red, load alpha, a bunch of unpremul instructions, store red
1011 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1012 }
1013
1014 {
1015 skvm::Builder p;
1016 auto rptr = p.varying<int>();
1017
Mike Reedf5ff4c22020-03-23 14:57:53 -04001018 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001019 g = p.splat(0.0f),
1020 b = p.splat(0.0f),
1021 a = p.splat(1.0f);
1022
1023 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001024 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001025
1026 // load red, store red
1027 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1028 }
1029}
Mike Klein05642042019-06-18 12:16:06 -05001030
Mike Klein05642042019-06-18 12:16:06 -05001031template <typename Fn>
1032static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -04001033 uint8_t buf[4096];
1034 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -05001035 fn(a);
1036
1037 REPORTER_ASSERT(r, a.size() == expected.size());
1038
Mike Klein88c0a902019-06-24 15:34:02 -04001039 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -05001040 want = expected.begin();
1041 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -05001042 REPORTER_ASSERT(r, got[i] == want[i],
1043 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -05001044 }
1045}
1046
1047DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -05001048 // Easiest way to generate test cases is
1049 //
1050 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1051 //
1052 // The -x86-asm-syntax=intel bit is optional, controlling the
1053 // input syntax only; the output will always be AT&T op x,y,dst style.
1054 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1055 // that a bit easier to use here, despite maybe favoring AT&T overall.
1056
1057 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -05001058 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -05001059 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -06001060 a.int3();
Mike Klein05642042019-06-18 12:16:06 -05001061 a.vzeroupper();
1062 a.ret();
1063 },{
Mike Kleinee5864a2019-11-11 09:16:44 -06001064 0xcc,
Mike Klein05642042019-06-18 12:16:06 -05001065 0xc5, 0xf8, 0x77,
1066 0xc3,
1067 });
1068
Mike Klein237dbb42019-07-19 09:44:47 -05001069 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -05001070 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -05001071 a.ret();
1072 a.align(4);
1073 },{
1074 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -05001075 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -05001076 });
Mike Klein61703a62019-06-18 15:01:12 -05001077
Mike Klein397fc882019-06-20 11:37:10 -05001078 test_asm(r, [&](A& a) {
1079 a.add(A::rax, 8); // Always good to test rax.
1080 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001081
Mike Klein397fc882019-06-20 11:37:10 -05001082 a.add(A::rdi, 12); // Last 0x48 REX
1083 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001084
Mike Klein86a645c2019-07-12 12:29:39 -05001085 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001086 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001087
Mike Klein397fc882019-06-20 11:37:10 -05001088 a.add(A::rsi, 128); // Requires 4 byte immediate.
1089 a.sub(A::r8 , 1000000);
Mike Kleinc15c9362020-04-16 11:10:36 -05001090
1091 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1092 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1093 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
Mike Klein68d075e2020-07-28 09:26:51 -05001094 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
Mike Kleinc15c9362020-04-16 11:10:36 -05001095 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
Mike Klein68d075e2020-07-28 09:26:51 -05001096 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
1097 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
Mike Kleinc15c9362020-04-16 11:10:36 -05001098 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1099 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1100 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1101
1102 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1103
1104 a.add( A::rax , A::rcx); // addq %rcx, %rax
1105 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1106 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1107 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1108
1109 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
Mike Klein61703a62019-06-18 15:01:12 -05001110 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001111 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001112 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001113
1114 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001115 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001116
Mike Klein86a645c2019-07-12 12:29:39 -05001117 0x49, 0x83, 0b11'000'000, 0x07,
1118 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001119
1120 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001121 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Kleinc15c9362020-04-16 11:10:36 -05001122
1123 0x48,0x83,0x06,0x07,
1124 0x48,0x83,0x46,0x0c,0x07,
1125 0x48,0x83,0x44,0x24,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001126 0x49,0x83,0x44,0x24,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001127 0x48,0x83,0x44,0x84,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001128 0x49,0x83,0x44,0x84,0x0c,0x07,
1129 0x4a,0x83,0x44,0xa0,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001130 0x4b,0x83,0x44,0x43,0x0c,0x07,
1131 0x49,0x83,0x44,0x03,0x0c,0x07,
1132 0x4a,0x83,0x44,0x18,0x0c,0x07,
1133
1134 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1135
1136 0x48,0x01,0xc8,
1137 0x48,0x01,0x08,
1138 0x48,0x01,0x48,0x0c,
1139 0x48,0x03,0x48,0x0c,
1140 0x48,0x2b,0x48,0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001141 });
Mike Klein397fc882019-06-20 11:37:10 -05001142
1143
1144 test_asm(r, [&](A& a) {
1145 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1146 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1147 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1148 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1149 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1150 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1151 },{
1152 /* VEX */ /*op*/ /*modRM*/
1153 0xc5, 0xf5, 0xfe, 0xc2,
1154 0xc5, 0x75, 0xfe, 0xc2,
1155 0xc5, 0xbd, 0xfe, 0xc2,
1156 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1157 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1158 0xc5, 0xf5, 0xfa, 0xc2,
1159 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001160
1161 test_asm(r, [&](A& a) {
Mike Klein84dd8f92020-09-15 07:57:27 -05001162 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1163 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1164 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1165 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1166
1167 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1168 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1169 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1170 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1171
1172 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1173 a.vpabsw (A::ymm4, A::ymm3);
1174 a.vpsllw (A::ymm4, A::ymm3, 12);
1175 a.vpsraw (A::ymm4, A::ymm3, 12);
1176 },{
1177 0xc5, 0xe5, 0xfd, 0xe2,
1178 0xc5, 0xe5, 0xe3, 0xe2,
1179 0xc5, 0xe5, 0x75, 0xe2,
1180 0xc5, 0xe5, 0x65, 0xe2,
1181
1182 0xc5, 0xe5, 0xea, 0xe2,
1183 0xc5, 0xe5, 0xee, 0xe2,
1184 0xc4,0xe2,0x65, 0x3a, 0xe2,
1185 0xc4,0xe2,0x65, 0x3e, 0xe2,
1186
1187 0xc4,0xe2,0x65, 0x0b, 0xe2,
1188 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1189 0xc5,0xdd,0x71, 0xf3, 0x0c,
1190 0xc5,0xdd,0x71, 0xe3, 0x0c,
1191 });
1192
1193 test_asm(r, [&](A& a) {
Mike Klein48c51bb2020-04-23 18:07:49 -05001194 A::Label l;
1195 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
Mike Klein714f8cc2019-11-06 12:54:46 -06001196 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1197 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1198 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1199 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1200 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1201 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Klein48c51bb2020-04-23 18:07:49 -05001202 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
Mike Kleinb9944122019-08-02 12:22:39 -05001203 },{
Mike Klein48c51bb2020-04-23 18:07:49 -05001204 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
Mike Kleinb9944122019-08-02 12:22:39 -05001205 0xc5,0xf5,0x76,0xc2,
1206 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001207 0xc5,0xf4,0xc2,0xc2,0x00,
1208 0xc5,0xf4,0xc2,0xc2,0x01,
1209 0xc5,0xf4,0xc2,0xc2,0x02,
1210 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001211 });
1212
1213 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001214 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1215 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1216 },{
1217 0xc5,0xf4,0x5d,0xc2,
1218 0xc5,0xf4,0x5f,0xc2,
1219 });
1220
1221 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001222 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1223 },{
1224 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1225 });
1226
1227 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001228 a.vpsrld(A::ymm15, A::ymm2, 8);
1229 a.vpsrld(A::ymm0 , A::ymm8, 5);
1230 },{
1231 0xc5, 0x85, 0x72,0xd2, 0x08,
1232 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1233 });
1234
1235 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001236 A::Label l;
Mike Klein184f6012020-07-22 13:17:29 -05001237 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
Mike Kleind8194dc2020-07-22 10:42:11 -05001238 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
Mike Kleinff0ae812019-06-20 15:03:44 -05001239 a.vpermq(A::ymm1, A::ymm2, 5);
Mike Kleind8194dc2020-07-22 10:42:11 -05001240 a.label(&l); // 6 bytes after vperm2f128
Mike Kleinff0ae812019-06-20 15:03:44 -05001241 },{
Mike Klein184f6012020-07-22 13:17:29 -05001242 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
Mike Kleind8194dc2020-07-22 10:42:11 -05001243 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
Mike Kleinff0ae812019-06-20 15:03:44 -05001244 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1245 });
Mike Kleine5053412019-06-21 12:37:22 -05001246
1247 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001248 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1249 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1250 },{
1251 0xc5,0xed,0x62,0x0f,
1252 0xc5,0xed,0x6a,0xcb,
1253 });
1254
1255 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001256 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1257 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1258 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1259 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1260 },{
1261 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1262 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1263 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1264 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1265 });
1266
1267 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001268 A::Label l;
1269 a.label(&l);
Mike Kleine5053412019-06-21 12:37:22 -05001270 a.byte(1);
1271 a.byte(2);
1272 a.byte(3);
1273 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001274
Mike Klein65c10b52019-07-12 09:22:21 -05001275 a.vbroadcastss(A::ymm0 , &l);
1276 a.vbroadcastss(A::ymm1 , &l);
1277 a.vbroadcastss(A::ymm8 , &l);
1278 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001279
Mike Klein65c10b52019-07-12 09:22:21 -05001280 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001281 a.vpaddd (A::ymm4, A::ymm3, &l);
1282 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001283
1284 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001285
1286 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001287 },{
1288 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001289
Mike Kleine5053412019-06-21 12:37:22 -05001290 /* VEX */ /*op*/ /* ModRM */ /* offset */
1291 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1292 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1293 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1294 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001295
1296 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001297
1298 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1299 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001300
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001301 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1302
1303 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001304 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001305
1306 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001307 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1308 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1309 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1310 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
Mike Klein94d054b2019-08-02 10:54:23 -05001311
1312 a.vbroadcastss(A::ymm8, A::xmm0);
1313 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001314 },{
1315 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1316 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1317 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1318 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1319 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001320
1321 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1322 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001323 });
1324
1325 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001326 A::Label l;
1327 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001328 a.jne(&l);
1329 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001330 a.je (&l);
1331 a.jmp(&l);
1332 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001333 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001334
Mike Kleinc15c9362020-04-16 11:10:36 -05001335 a.cmp(A::rdx, 1);
Mike Klein35b97c32019-07-12 12:32:45 -05001336 a.cmp(A::rax, 12);
1337 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001338 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001339 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1340 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1341 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1342 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1343 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001344 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001345
Mike Kleinc15c9362020-04-16 11:10:36 -05001346 0x48,0x83,0xfa,0x01,
Mike Klein35b97c32019-07-12 12:32:45 -05001347 0x48,0x83,0xf8,0x0c,
1348 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001349 });
Mike Klein120d9e82019-06-21 15:52:55 -05001350
1351 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001352 a.vmovups(A::ymm5, A::Mem{A::rsi});
1353 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001354
Mike Klein400ba222020-06-30 15:54:19 -05001355 a.vmovups(A::xmm5, A::Mem{A::rsi});
Mike Klein8390f2e2020-04-15 17:03:08 -05001356 a.vmovups(A::Mem{A::rsi}, A::xmm5);
Mike Klein95529e82019-08-02 11:43:43 -05001357
Mike Kleinedc2dac2020-04-15 16:18:27 -05001358 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1359 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001360
Mike Klein8390f2e2020-04-15 17:03:08 -05001361 a.vmovq(A::Mem{A::rdx}, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001362 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001363 /* VEX */ /*Op*/ /* ModRM */
1364 0xc5, 0xfc, 0x10, 0b00'101'110,
1365 0xc5, 0xfc, 0x11, 0b00'101'110,
1366
Mike Klein400ba222020-06-30 15:54:19 -05001367 0xc5, 0xf8, 0x10, 0b00'101'110,
Mike Klein95529e82019-08-02 11:43:43 -05001368 0xc5, 0xf8, 0x11, 0b00'101'110,
1369
Mike Klein52010b72019-08-02 11:18:00 -05001370 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001371 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001372
1373 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001374 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001375
1376 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001377 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1378 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1379 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001380
Mike Kleinedc2dac2020-04-15 16:18:27 -05001381 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1382 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1383 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001384 },{
1385 0xc5,0xfc,0x10,0x2c,0x24,
1386 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1387 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1388
1389 0xc5,0xfc,0x11,0x2c,0x24,
1390 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1391 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1392 });
1393
1394 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001395 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1396 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1397 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1398 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1399 a.movzbq(A::r8, A::Mem{A::rsi, 400});
Mike Klein35b97c32019-07-12 12:32:45 -05001400
Mike Kleinc15c9362020-04-16 11:10:36 -05001401 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1402 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1403 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1404 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1405 a.movzwq(A::r8, A::Mem{A::rsi, 400});
Mike Kleincb511042020-04-13 13:12:17 -05001406
Mike Klein8390f2e2020-04-15 17:03:08 -05001407 a.vmovd(A::Mem{A::rax}, A::xmm0);
1408 a.vmovd(A::Mem{A::rax}, A::xmm8);
1409 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1410
1411 a.vmovd(A::xmm0, A::Mem{A::rax});
1412 a.vmovd(A::xmm8, A::Mem{A::rax});
1413 a.vmovd(A::xmm0, A::Mem{A::r8 });
1414
1415 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1416 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1417 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1418
Mike Klein35b97c32019-07-12 12:32:45 -05001419 a.vmovd(A::rax, A::xmm0);
1420 a.vmovd(A::rax, A::xmm8);
Mike Klein8390f2e2020-04-15 17:03:08 -05001421 a.vmovd(A::r8 , A::xmm0);
Mike Klein35b97c32019-07-12 12:32:45 -05001422
1423 a.vmovd(A::xmm0, A::rax);
1424 a.vmovd(A::xmm8, A::rax);
Mike Klein8390f2e2020-04-15 17:03:08 -05001425 a.vmovd(A::xmm0, A::r8 );
Mike Klein35b97c32019-07-12 12:32:45 -05001426
Mike Kleinc15c9362020-04-16 11:10:36 -05001427 a.movb(A::Mem{A::rdx}, A::rax);
1428 a.movb(A::Mem{A::rdx}, A::r8 );
1429 a.movb(A::Mem{A::r8 }, A::rax);
Mike Klein35b97c32019-07-12 12:32:45 -05001430
Mike Kleinc15c9362020-04-16 11:10:36 -05001431 a.movb(A::rdx, A::Mem{A::rax});
1432 a.movb(A::rdx, A::Mem{A::r8 });
1433 a.movb(A::r8 , A::Mem{A::rax});
1434
1435 a.movb(A::rdx, 12);
1436 a.movb(A::rax, 4);
1437 a.movb(A::r8 , -1);
1438
1439 a.movb(A::Mem{A::rdx}, 12);
1440 a.movb(A::Mem{A::rax}, 4);
1441 a.movb(A::Mem{A::r8 }, -1);
1442 },{
1443 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1444 0x49,0x0f,0xb6,0x00,
1445 0x4c,0x0f,0xb6,0x06,
1446 0x4c,0x0f,0xb6,0x46, 12,
1447 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1448
1449 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1450 0x49,0x0f,0xb7,0x00,
1451 0x4c,0x0f,0xb7,0x06,
1452 0x4c,0x0f,0xb7,0x46, 12,
1453 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
Mike Kleincb511042020-04-13 13:12:17 -05001454
Mike Klein35b97c32019-07-12 12:32:45 -05001455 0xc5,0xf9,0x7e,0x00,
1456 0xc5,0x79,0x7e,0x00,
1457 0xc4,0xc1,0x79,0x7e,0x00,
1458
1459 0xc5,0xf9,0x6e,0x00,
1460 0xc5,0x79,0x6e,0x00,
1461 0xc4,0xc1,0x79,0x6e,0x00,
1462
Mike Klein93d3fab2020-01-14 10:46:44 -06001463 0xc5,0xf9,0x6e,0x04,0x88,
1464 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1465 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1466
Mike Klein35b97c32019-07-12 12:32:45 -05001467 0xc5,0xf9,0x7e,0xc0,
1468 0xc5,0x79,0x7e,0xc0,
1469 0xc4,0xc1,0x79,0x7e,0xc0,
1470
1471 0xc5,0xf9,0x6e,0xc0,
1472 0xc5,0x79,0x6e,0xc0,
1473 0xc4,0xc1,0x79,0x6e,0xc0,
1474
Mike Kleinc15c9362020-04-16 11:10:36 -05001475 0x48 ,0x88, 0x02,
1476 0x4c, 0x88, 0x02,
1477 0x49, 0x88, 0x00,
1478
1479 0x48 ,0x8a, 0x10,
1480 0x49, 0x8a, 0x10,
1481 0x4c, 0x8a, 0x00,
1482
1483 0x48, 0xc6, 0xc2, 0x0c,
1484 0x48, 0xc6, 0xc0, 0x04,
1485 0x49, 0xc6, 0xc0, 0xff,
1486
1487 0x48, 0xc6, 0x02, 0x0c,
1488 0x48, 0xc6, 0x00, 0x04,
1489 0x49, 0xc6, 0x00, 0xff,
Mike Klein35b97c32019-07-12 12:32:45 -05001490 });
1491
1492 test_asm(r, [&](A& a) {
Mike Klein4ecc9702020-07-30 10:03:10 -05001493 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1494 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1495
Mike Klein8390f2e2020-04-15 17:03:08 -05001496 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1497 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
Mike Klein52010b72019-08-02 11:18:00 -05001498
Mike Klein8390f2e2020-04-15 17:03:08 -05001499 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
Mike Klein4ecc9702020-07-30 10:03:10 -05001500 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
Mike Klein35b97c32019-07-12 12:32:45 -05001501
Mike Klein21e85eb2020-04-17 13:57:13 -05001502 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1503 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1504
1505 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1506 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1507
Mike Klein8390f2e2020-04-15 17:03:08 -05001508 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1509 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein95529e82019-08-02 11:43:43 -05001510
Mike Klein8390f2e2020-04-15 17:03:08 -05001511 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1512 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein35b97c32019-07-12 12:32:45 -05001513 },{
Mike Klein4ecc9702020-07-30 10:03:10 -05001514 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1515 0xc4,0x43,0x71, 0x22, 0x00, 3,
1516
Mike Klein52010b72019-08-02 11:18:00 -05001517 0xc5,0xb9, 0xc4, 0x0e, 4,
1518 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1519
Mike Klein35b97c32019-07-12 12:32:45 -05001520 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1521 0xc4,0x43,0x71, 0x20, 0x00, 12,
1522
Mike Klein21e85eb2020-04-17 13:57:13 -05001523 0xc4,0x63,0x7d,0x39,0xc1, 1,
1524 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1525
1526 0xc4,0x63,0x79,0x16,0x06, 3,
1527 0xc4,0xc3,0x79,0x16,0x08, 2,
1528
Mike Klein95529e82019-08-02 11:43:43 -05001529 0xc4,0x63,0x79, 0x15, 0x06, 7,
1530 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1531
Mike Klein35b97c32019-07-12 12:32:45 -05001532 0xc4,0x63,0x79, 0x14, 0x06, 7,
1533 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1534 });
1535
1536 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001537 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1538 },{
1539 0xc5, 0x9d, 0xdf, 0xda,
1540 });
Mike Klein9f4df802019-06-24 18:47:16 -04001541
Mike Kleind4546d62019-07-30 12:15:40 -05001542 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001543 A::Label l;
1544 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1545
1546 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1547 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1548 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1549
1550 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1551 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1552
1553 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1554 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1555 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1556 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1557 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1558
1559 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1560 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1561 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1562
Mike Kleind4546d62019-07-30 12:15:40 -05001563 a.vcvttps2dq(A::ymm3, A::ymm2);
1564 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001565 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001566 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001567 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001568 },{
1569 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001570
1571 0xc5,0xfd,0x6f,0x1e,
1572 0xc5,0xfd,0x6f,0x1c,0x24,
1573 0xc4,0xc1,0x7d,0x6f,0x1b,
1574
1575 0xc5,0xfd,0x6f,0x5e,0x04,
1576 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1577
1578 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1579 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1580 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1581 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1582 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1583
1584 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1585 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1586
1587 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1588
Mike Kleind4546d62019-07-30 12:15:40 -05001589 0xc5,0xfe,0x5b,0xda,
1590 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001591 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001592 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001593 });
1594
Mike Kleinbeaa1082020-01-13 14:04:18 -06001595 test_asm(r, [&](A& a) {
Mike Klein4d680cd2020-07-15 09:58:51 -05001596 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1597 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1598
1599 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1600 a.vcvtph2ps(A::ymm2, A::xmm3);
1601 },{
1602 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1603 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1604
1605 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1606 0xc4,0xe2,0x7d,0x13,0xd3,
1607 });
1608
1609 test_asm(r, [&](A& a) {
Mike Kleinbeaa1082020-01-13 14:04:18 -06001610 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1611 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1612 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1613 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1614 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1615 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1616 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1617 },{
1618 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1619 0xc4,0xe2,0x75,0x92,0x04,0x10,
1620 0xc4,0x62,0x75,0x92,0x14,0x10,
1621 0xc4,0xa2,0x75,0x92,0x04,0x20,
1622 0xc4,0xc2,0x75,0x92,0x04,0x11,
1623 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1624 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1625 });
1626
Mike Kleinc322f632020-01-13 16:18:58 -06001627 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001628 a.mov(A::rax, A::Mem{A::rdi, 0});
1629 a.mov(A::rax, A::Mem{A::rdi, 1});
1630 a.mov(A::rax, A::Mem{A::rdi, 512});
1631 a.mov(A::r15, A::Mem{A::r13, 42});
1632 a.mov(A::rax, A::Mem{A::r13, 42});
1633 a.mov(A::r15, A::Mem{A::rax, 42});
1634 a.mov(A::rax, 1);
1635 a.mov(A::rax, A::rcx);
Mike Kleinc322f632020-01-13 16:18:58 -06001636 },{
1637 0x48, 0x8b, 0x07,
1638 0x48, 0x8b, 0x47, 0x01,
1639 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1640 0x4d, 0x8b, 0x7d, 0x2a,
1641 0x49, 0x8b, 0x45, 0x2a,
1642 0x4c, 0x8b, 0x78, 0x2a,
Mike Kleinc15c9362020-04-16 11:10:36 -05001643 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1644 0x48, 0x89, 0xc8,
Mike Kleinc322f632020-01-13 16:18:58 -06001645 });
1646
Mike Klein9f4df802019-06-24 18:47:16 -04001647 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1648
1649 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001650 a.and16b(A::v4, A::v3, A::v1);
1651 a.orr16b(A::v4, A::v3, A::v1);
1652 a.eor16b(A::v4, A::v3, A::v1);
1653 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001654 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001655 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001656
1657 a.add4s(A::v4, A::v3, A::v1);
1658 a.sub4s(A::v4, A::v3, A::v1);
1659 a.mul4s(A::v4, A::v3, A::v1);
1660
Mike Klein97afd2e2019-10-16 14:11:27 -05001661 a.cmeq4s(A::v4, A::v3, A::v1);
1662 a.cmgt4s(A::v4, A::v3, A::v1);
1663
Mike Klein65809142019-06-25 09:44:02 -04001664 a.sub8h(A::v4, A::v3, A::v1);
1665 a.mul8h(A::v4, A::v3, A::v1);
1666
Mike Klein9f4df802019-06-24 18:47:16 -04001667 a.fadd4s(A::v4, A::v3, A::v1);
1668 a.fsub4s(A::v4, A::v3, A::v1);
1669 a.fmul4s(A::v4, A::v3, A::v1);
1670 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001671 a.fmin4s(A::v4, A::v3, A::v1);
1672 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein8d78da92020-11-25 13:53:20 -06001673
1674 a.fneg4s (A::v4, A::v3);
1675 a.fsqrt4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001676
Mike Klein65809142019-06-25 09:44:02 -04001677 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001678 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001679
1680 a.fcmeq4s(A::v4, A::v3, A::v1);
1681 a.fcmgt4s(A::v4, A::v3, A::v1);
1682 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001683 },{
Mike Klein65809142019-06-25 09:44:02 -04001684 0x64,0x1c,0x21,0x4e,
1685 0x64,0x1c,0xa1,0x4e,
1686 0x64,0x1c,0x21,0x6e,
1687 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001688 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001689 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001690
1691 0x64,0x84,0xa1,0x4e,
1692 0x64,0x84,0xa1,0x6e,
1693 0x64,0x9c,0xa1,0x4e,
1694
Mike Klein97afd2e2019-10-16 14:11:27 -05001695 0x64,0x8c,0xa1,0x6e,
1696 0x64,0x34,0xa1,0x4e,
1697
Mike Klein65809142019-06-25 09:44:02 -04001698 0x64,0x84,0x61,0x6e,
1699 0x64,0x9c,0x61,0x4e,
1700
Mike Klein9f4df802019-06-24 18:47:16 -04001701 0x64,0xd4,0x21,0x4e,
1702 0x64,0xd4,0xa1,0x4e,
1703 0x64,0xdc,0x21,0x6e,
1704 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001705 0x64,0xf4,0xa1,0x4e,
1706 0x64,0xf4,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001707
Mike Klein7c0332c2020-03-05 14:18:04 -06001708 0x64,0xf8,0xa0,0x6e,
Mike Klein8d78da92020-11-25 13:53:20 -06001709 0x64,0xf8,0xa1,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001710
Mike Klein65809142019-06-25 09:44:02 -04001711 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001712 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001713
1714 0x64,0xe4,0x21,0x4e,
1715 0x64,0xe4,0xa1,0x6e,
1716 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001717 });
1718
1719 test_asm(r, [&](A& a) {
1720 a.shl4s(A::v4, A::v3, 0);
1721 a.shl4s(A::v4, A::v3, 1);
1722 a.shl4s(A::v4, A::v3, 8);
1723 a.shl4s(A::v4, A::v3, 16);
1724 a.shl4s(A::v4, A::v3, 31);
1725
1726 a.sshr4s(A::v4, A::v3, 1);
1727 a.sshr4s(A::v4, A::v3, 8);
1728 a.sshr4s(A::v4, A::v3, 31);
1729
1730 a.ushr4s(A::v4, A::v3, 1);
1731 a.ushr4s(A::v4, A::v3, 8);
1732 a.ushr4s(A::v4, A::v3, 31);
1733
1734 a.ushr8h(A::v4, A::v3, 1);
1735 a.ushr8h(A::v4, A::v3, 8);
1736 a.ushr8h(A::v4, A::v3, 15);
1737 },{
1738 0x64,0x54,0x20,0x4f,
1739 0x64,0x54,0x21,0x4f,
1740 0x64,0x54,0x28,0x4f,
1741 0x64,0x54,0x30,0x4f,
1742 0x64,0x54,0x3f,0x4f,
1743
1744 0x64,0x04,0x3f,0x4f,
1745 0x64,0x04,0x38,0x4f,
1746 0x64,0x04,0x21,0x4f,
1747
1748 0x64,0x04,0x3f,0x6f,
1749 0x64,0x04,0x38,0x6f,
1750 0x64,0x04,0x21,0x6f,
1751
1752 0x64,0x04,0x1f,0x6f,
1753 0x64,0x04,0x18,0x6f,
1754 0x64,0x04,0x11,0x6f,
1755 });
1756
1757 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001758 a.sli4s(A::v4, A::v3, 0);
1759 a.sli4s(A::v4, A::v3, 1);
1760 a.sli4s(A::v4, A::v3, 8);
1761 a.sli4s(A::v4, A::v3, 16);
1762 a.sli4s(A::v4, A::v3, 31);
1763 },{
1764 0x64,0x54,0x20,0x6f,
1765 0x64,0x54,0x21,0x6f,
1766 0x64,0x54,0x28,0x6f,
1767 0x64,0x54,0x30,0x6f,
1768 0x64,0x54,0x3f,0x6f,
1769 });
1770
1771 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001772 a.scvtf4s (A::v4, A::v3);
1773 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001774 a.fcvtns4s(A::v4, A::v3);
Mike Klein8d78da92020-11-25 13:53:20 -06001775 a.frintp4s(A::v4, A::v3);
1776 a.frintm4s(A::v4, A::v3);
Mike Kleinec255632020-12-03 10:25:31 -06001777 a.fcvtn (A::v4, A::v3);
1778 a.fcvtl (A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001779 },{
1780 0x64,0xd8,0x21,0x4e,
1781 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001782 0x64,0xa8,0x21,0x4e,
Mike Klein8d78da92020-11-25 13:53:20 -06001783 0x64,0x88,0xa1,0x4e,
1784 0x64,0x98,0x21,0x4e,
Mike Kleinec255632020-12-03 10:25:31 -06001785 0x64,0x68,0x21,0x0e,
1786 0x64,0x78,0x21,0x0e,
Mike Klein9f4df802019-06-24 18:47:16 -04001787 });
Mike Klein15a368d2019-06-26 10:21:12 -04001788
1789 test_asm(r, [&](A& a) {
Mike Kleinb8e041e2020-04-17 11:30:29 -05001790 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1791 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1792 a.strq(A::v1, A::sp); // str q1, [sp]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001793 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001794 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001795 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001796 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1797 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001798 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001799 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
Mike Kleindbc19ea2020-11-18 13:32:14 -06001800 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8]
Mike Kleinb8e041e2020-04-17 11:30:29 -05001801 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1802 a.add (A::sp, A::sp, 32); // add sp, sp, #32
Mike Klein48e78242020-04-17 09:38:09 -05001803 },{
1804 0xff,0x83,0x00,0xd1,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001805 0xe0,0x07,0x80,0x3d,
1806 0xe1,0x03,0x80,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001807 0xe0,0x1b,0x00,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001808 0xe0,0x1b,0x00,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001809 0xe0,0x2b,0x00,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001810 0xe0,0xbf,0x00,0x3d,
1811 0xe9,0xab,0x40,0x3d,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001812 0xe9,0xbf,0x40,0x7d,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001813 0xe7,0x2b,0x40,0xbd,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001814 0xe7,0x07,0x40,0xfd,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001815 0xe5,0x03,0xc2,0x3d,
Mike Klein48e78242020-04-17 09:38:09 -05001816 0xff,0x83,0x00,0x91,
1817 });
1818
1819 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001820 a.brk(0);
1821 a.brk(65535);
1822
Mike Klein15a368d2019-06-26 10:21:12 -04001823 a.ret(A::x30); // Conventional ret using link register.
1824 a.ret(A::x13); // Can really return using any register if we like.
1825
1826 a.add(A::x2, A::x2, 4);
1827 a.add(A::x3, A::x2, 32);
1828
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001829 a.sub(A::x2, A::x2, 4);
1830 a.sub(A::x3, A::x2, 32);
1831
Mike Klein15a368d2019-06-26 10:21:12 -04001832 a.subs(A::x2, A::x2, 4);
1833 a.subs(A::x3, A::x2, 32);
1834
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001835 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1836 a.cmp(A::x2, 4);
1837
Mike Kleinc74db792020-05-11 11:57:12 -05001838 A::Label l;
1839 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001840 a.bne(&l);
1841 a.bne(&l);
1842 a.blt(&l);
1843 a.b(&l);
1844 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001845 a.cbz(A::x2, &l);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001846
1847 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1
1848 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3
Mike Klein15a368d2019-06-26 10:21:12 -04001849 },{
Mike Klein37be7712019-11-13 13:19:01 -06001850 0x00,0x00,0x20,0xd4,
1851 0xe0,0xff,0x3f,0xd4,
1852
Mike Klein15a368d2019-06-26 10:21:12 -04001853 0xc0,0x03,0x5f,0xd6,
1854 0xa0,0x01,0x5f,0xd6,
1855
1856 0x42,0x10,0x00,0x91,
1857 0x43,0x80,0x00,0x91,
1858
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001859 0x42,0x10,0x00,0xd1,
1860 0x43,0x80,0x00,0xd1,
1861
Mike Klein15a368d2019-06-26 10:21:12 -04001862 0x42,0x10,0x00,0xf1,
1863 0x43,0x80,0x00,0xf1,
1864
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001865 0x5f,0x10,0x00,0xf1,
1866 0x5f,0x10,0x00,0xf1,
1867
1868 0x01,0x00,0x00,0x54, // b.ne #0
1869 0xe1,0xff,0xff,0x54, // b.ne #-4
1870 0xcb,0xff,0xff,0x54, // b.lt #-8
1871 0xae,0xff,0xff,0x54, // b.al #-12
1872 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1873 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Kleindbc19ea2020-11-18 13:32:14 -06001874
1875 0x43,0x00,0x01,0x8b,
1876 0x43,0x0c,0x81,0x8b,
Mike Klein15a368d2019-06-26 10:21:12 -04001877 });
Mike Kleine51632e2019-06-26 14:47:43 -04001878
Mike Kleince7b88c2019-07-11 14:06:40 -05001879 // Can we cbz() to a not-yet-defined label?
1880 test_asm(r, [&](A& a) {
1881 A::Label l;
1882 a.cbz(A::x2, &l);
1883 a.add(A::x3, A::x2, 32);
1884 a.label(&l);
1885 a.ret(A::x30);
1886 },{
1887 0x42,0x00,0x00,0xb4, // cbz x2, #8
1888 0x43,0x80,0x00,0x91, // add x3, x2, #32
1889 0xc0,0x03,0x5f,0xd6, // ret
1890 });
1891
1892 // If we start a label as a backward label,
1893 // can we redefine it to be a future label?
1894 // (Not sure this is useful... just want to test it works.)
1895 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001896 A::Label l1;
1897 a.label(&l1);
Mike Kleince7b88c2019-07-11 14:06:40 -05001898 a.add(A::x3, A::x2, 32);
1899 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1900
Mike Kleinc74db792020-05-11 11:57:12 -05001901 A::Label l2; // Start off the same...
1902 a.label(&l2);
Mike Kleince7b88c2019-07-11 14:06:40 -05001903 a.add(A::x3, A::x2, 32);
1904 a.cbz(A::x2, &l2); // Looks like this will go backward...
1905 a.add(A::x2, A::x2, 4);
1906 a.add(A::x3, A::x2, 32);
1907 a.label(&l2); // But no... actually forward! What a switcheroo!
1908 },{
1909 0x43,0x80,0x00,0x91, // add x3, x2, #32
1910 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1911
1912 0x43,0x80,0x00,0x91, // add x3, x2, #32
1913 0x62,0x00,0x00,0xb4, // cbz x2, #12
1914 0x42,0x10,0x00,0x91, // add x2, x2, #4
1915 0x43,0x80,0x00,0x91, // add x3, x2, #32
1916 });
1917
Mike Klein81d52672019-07-30 11:11:09 -05001918 // Loading from a label on ARM.
1919 test_asm(r, [&](A& a) {
1920 A::Label fore,aft;
1921 a.label(&fore);
1922 a.word(0x01234567);
1923 a.ldrq(A::v1, &fore);
1924 a.ldrq(A::v2, &aft);
1925 a.label(&aft);
1926 a.word(0x76543210);
1927 },{
1928 0x67,0x45,0x23,0x01,
1929 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1930 0x22,0x00,0x00,0x9c, // ldr q2, #4
1931 0x10,0x32,0x54,0x76,
1932 });
1933
Mike Kleine51632e2019-06-26 14:47:43 -04001934 test_asm(r, [&](A& a) {
1935 a.ldrq(A::v0, A::x8);
1936 a.strq(A::v0, A::x8);
1937 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001938 0x00,0x01,0xc0,0x3d,
1939 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001940 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001941
1942 test_asm(r, [&](A& a) {
Mike Klein8d78da92020-11-25 13:53:20 -06001943 a.dup4s (A::v0, A::x8);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001944 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
1945 a.ld1r8h (A::v0, A::x8);
1946 a.ld1r16b(A::v0, A::x8);
1947 },{
Mike Klein8d78da92020-11-25 13:53:20 -06001948 0x00,0x0d,0x04,0x4e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001949 0x00,0xc9,0x40,0x4d,
1950 0x00,0xc5,0x40,0x4d,
1951 0x00,0xc1,0x40,0x4d,
1952 });
1953
1954 test_asm(r, [&](A& a) {
Mike Klein1fa149a2019-07-01 11:18:08 -05001955 a.xtns2h(A::v0, A::v0);
1956 a.xtnh2b(A::v0, A::v0);
1957 a.strs (A::v0, A::x0);
1958
1959 a.ldrs (A::v0, A::x0);
1960 a.uxtlb2h(A::v0, A::v0);
1961 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001962
1963 a.uminv4s(A::v3, A::v4);
Mike Kleindbc19ea2020-11-18 13:32:14 -06001964 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0]
1965 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1]
1966 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3
Mike Klein1fa149a2019-07-01 11:18:08 -05001967 },{
1968 0x00,0x28,0x61,0x0e,
1969 0x00,0x28,0x21,0x0e,
1970 0x00,0x00,0x00,0xbd,
1971
1972 0x00,0x00,0x40,0xbd,
1973 0x00,0xa4,0x08,0x2f,
1974 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001975
1976 0x83,0xa8,0xb1,0x6e,
Mike Kleindbc19ea2020-11-18 13:32:14 -06001977 0x83,0x3c,0x04,0x0e,
1978 0x83,0x3c,0x0c,0x0e,
1979 0x64,0x1c,0x1c,0x4e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001980 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001981
1982 test_asm(r, [&](A& a) {
1983 a.ldrb(A::v0, A::x8);
1984 a.strb(A::v0, A::x8);
1985 },{
1986 0x00,0x01,0x40,0x3d,
1987 0x00,0x01,0x00,0x3d,
1988 });
Mike Klein81d52672019-07-30 11:11:09 -05001989
1990 test_asm(r, [&](A& a) {
Mike Kleindbc19ea2020-11-18 13:32:14 -06001991 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24]
1992 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12]
1993 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6]
1994 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3]
1995 },{
1996 0x20,0x0c,0x40,0xf9,
1997 0x20,0x0c,0x40,0xb9,
1998 0x20,0x0c,0x40,0x79,
1999 0x20,0x0c,0x40,0x39,
2000 });
2001
2002 test_asm(r, [&](A& a) {
Mike Kleinf5097db2020-12-03 09:21:00 -06002003 a.tbl (A::v0, A::v1, A::v2);
Mike Kleinc7bca522020-12-03 10:01:29 -06002004 a.uzp14s(A::v0, A::v1, A::v2);
2005 a.uzp24s(A::v0, A::v1, A::v2);
Mike Kleinf5097db2020-12-03 09:21:00 -06002006 a.zip14s(A::v0, A::v1, A::v2);
2007 a.zip24s(A::v0, A::v1, A::v2);
Mike Klein81d52672019-07-30 11:11:09 -05002008 },{
2009 0x20,0x00,0x02,0x4e,
Mike Kleinc7bca522020-12-03 10:01:29 -06002010 0x20,0x18,0x82,0x4e,
2011 0x20,0x58,0x82,0x4e,
Mike Kleinf5097db2020-12-03 09:21:00 -06002012 0x20,0x38,0x82,0x4e,
2013 0x20,0x78,0x82,0x4e,
Mike Klein81d52672019-07-30 11:11:09 -05002014 });
Mike Klein05642042019-06-18 12:16:06 -05002015}
Mike Reedbcb46c02020-03-23 17:51:01 -04002016
2017DEF_TEST(SkVM_approx_math, r) {
2018 auto eval = [](int N, float values[], auto fn) {
2019 skvm::Builder b;
2020 skvm::Arg inout = b.varying<float>();
2021
2022 b.storeF(inout, fn(&b, b.loadF(inout)));
2023
2024 b.done().eval(N, values);
2025 };
2026
2027 auto compare = [r](int N, const float values[], const float expected[]) {
2028 for (int i = 0; i < N; ++i) {
2029 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
2030 }
2031 };
2032
2033 // log2
2034 {
2035 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
2036 constexpr int N = SK_ARRAY_COUNT(values);
2037 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2038 return b->approx_log2(v);
2039 });
2040 const float expected[] = {-2, -1, 0, 1, 2, 3};
2041 compare(N, values, expected);
2042 }
2043
2044 // pow2
2045 {
2046 float values[] = {-2, -1, 0, 1, 2, 3};
2047 constexpr int N = SK_ARRAY_COUNT(values);
2048 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2049 return b->approx_pow2(v);
2050 });
2051 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
2052 compare(N, values, expected);
2053 }
2054
2055 // powf -- x^0.5
2056 {
2057 float bases[] = {0, 1, 4, 9, 16};
2058 constexpr int N = SK_ARRAY_COUNT(bases);
2059 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2060 return b->approx_powf(base, b->splat(0.5f));
2061 });
2062 const float expected[] = {0, 1, 2, 3, 4};
2063 compare(N, bases, expected);
2064 }
2065 // powf -- 3^x
2066 {
2067 float exps[] = {-2, -1, 0, 1, 2};
2068 constexpr int N = SK_ARRAY_COUNT(exps);
2069 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2070 return b->approx_powf(b->splat(3.0f), exp);
2071 });
2072 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
2073 compare(N, exps, expected);
2074 }
Mike Reed82ff25e2020-04-07 13:51:41 -04002075
Mike Reedd468a162020-04-11 14:14:00 -04002076 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04002077 skvm::Builder b;
2078 skvm::Arg inout = b.varying<float>();
2079 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04002080 float actual = arg;
2081 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04002082
Mike Reedd468a162020-04-11 14:14:00 -04002083 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04002084
2085 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04002086 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04002087 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04002088 }
Mike Reed1b84ef22020-04-13 17:56:24 -04002089 return err;
2090 };
2091
2092 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2093 skvm::Builder b;
2094 skvm::Arg in0 = b.varying<float>();
2095 skvm::Arg in1 = b.varying<float>();
2096 skvm::Arg out = b.varying<float>();
2097 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2098 float actual;
2099 b.done().eval(1, &arg0, &arg1, &actual);
2100
2101 float err = std::abs(actual - expected);
2102
2103 if (err > tolerance) {
2104 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2105 REPORTER_ASSERT(r, true);
2106 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002107 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04002108 };
2109
Mike Reed801ba0d2020-04-10 12:37:36 -04002110 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04002111 {
2112 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04002113 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04002114 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2115 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2116 return approx_sin(x);
2117 });
2118 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2119 return approx_cos(x);
2120 });
2121 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002122
2123 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2124 // so bring in the domain a little.
2125 constexpr float eps = 0.16f;
2126 float err = 0;
2127 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2128 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2129 return approx_tan(x);
2130 });
2131 // try again with some multiples of P, to check our periodicity
2132 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2133 return approx_tan(x + 3*P);
2134 });
2135 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2136 return approx_tan(x - 3*P);
2137 });
2138 }
Mike Reedd468a162020-04-11 14:14:00 -04002139 if (0) { SkDebugf("tan error %g\n", err); }
2140 }
2141
2142 // asin, acos, atan
2143 {
2144 constexpr float tol = 0.00175f;
2145 float err = 0;
2146 for (float x = -1; x <= 1; x += 1.0f/64) {
2147 err += test(x, asin(x), tol, [](skvm::F32 x) {
2148 return approx_asin(x);
2149 });
2150 test(x, acos(x), tol, [](skvm::F32 x) {
2151 return approx_acos(x);
2152 });
Mike Reed801ba0d2020-04-10 12:37:36 -04002153 }
Mike Reedd468a162020-04-11 14:14:00 -04002154 if (0) { SkDebugf("asin error %g\n", err); }
2155
2156 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04002157 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04002158 err += test(x, atan(x), tol, [](skvm::F32 x) {
2159 return approx_atan(x);
2160 });
2161 }
2162 if (0) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04002163
2164 for (float y = -3; y <= 3; y += 1) {
2165 for (float x = -3; x <= 3; x += 1) {
2166 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
Mike Klein962020f2020-05-01 10:35:18 -05002167 return approx_atan2(y,x);
Mike Reed1b84ef22020-04-13 17:56:24 -04002168 });
2169 }
2170 }
2171 if (0) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04002172 }
Mike Reedbcb46c02020-03-23 17:51:01 -04002173}
Mike Klein210288f2020-04-08 11:31:07 -05002174
2175DEF_TEST(SkVM_min_max, r) {
2176 // min() and max() have subtle behavior when one argument is NaN and
2177 // the other isn't. It's not sound to blindly swap their arguments.
2178 //
2179 // All backends must behave like std::min() and std::max(), which are
2180 //
2181 // min(x,y) = y<x ? y : x
2182 // max(x,y) = x<y ? y : x
2183
2184 // ±NaN, ±0, ±1, ±inf
2185 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2186 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2187
2188 float f[8];
2189 memcpy(f, bits, sizeof(bits));
2190
2191 auto identical = [&](float x, float y) {
2192 uint32_t X,Y;
2193 memcpy(&X, &x, 4);
2194 memcpy(&Y, &y, 4);
2195 return X == Y;
2196 };
2197
2198 // Test min/max with non-constant x, non-constant y.
2199 // (Whether x and y are varying or uniform shouldn't make any difference.)
2200 {
2201 skvm::Builder b;
2202 {
2203 skvm::Arg src = b.varying<float>(),
2204 mn = b.varying<float>(),
2205 mx = b.varying<float>();
2206
2207 skvm::F32 x = b.loadF(src),
2208 y = b.uniformF(b.uniform(), 0);
2209
2210 b.storeF(mn, b.min(x,y));
2211 b.storeF(mx, b.max(x,y));
2212 }
2213
Mike Klein10fc1e62020-04-13 11:57:05 -05002214 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002215 float mn[8], mx[8];
2216 for (int i = 0; i < 8; i++) {
2217 // min() and max() everything with f[i].
2218 program.eval(8, f,mn,mx, &f[i]);
2219
2220 for (int j = 0; j < 8; j++) {
2221 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2222 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2223 }
2224 }
2225 });
2226 }
2227
2228 // Test each with constant on the right.
2229 for (int i = 0; i < 8; i++) {
2230 skvm::Builder b;
2231 {
2232 skvm::Arg src = b.varying<float>(),
2233 mn = b.varying<float>(),
2234 mx = b.varying<float>();
2235
2236 skvm::F32 x = b.loadF(src),
2237 y = b.splat(f[i]);
2238
2239 b.storeF(mn, b.min(x,y));
2240 b.storeF(mx, b.max(x,y));
2241 }
2242
Mike Klein10fc1e62020-04-13 11:57:05 -05002243 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002244 float mn[8], mx[8];
2245 program.eval(8, f,mn,mx);
2246 for (int j = 0; j < 8; j++) {
2247 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2248 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2249 }
2250 });
2251 }
2252
2253 // Test each with constant on the left.
2254 for (int i = 0; i < 8; i++) {
2255 skvm::Builder b;
2256 {
2257 skvm::Arg src = b.varying<float>(),
2258 mn = b.varying<float>(),
2259 mx = b.varying<float>();
2260
2261 skvm::F32 x = b.splat(f[i]),
2262 y = b.loadF(src);
2263
2264 b.storeF(mn, b.min(x,y));
2265 b.storeF(mx, b.max(x,y));
2266 }
2267
Mike Klein10fc1e62020-04-13 11:57:05 -05002268 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002269 float mn[8], mx[8];
2270 program.eval(8, f,mn,mx);
2271 for (int j = 0; j < 8; j++) {
2272 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2273 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2274 }
2275 });
2276 }
2277}
Mike Klein4d680cd2020-07-15 09:58:51 -05002278
2279DEF_TEST(SkVM_halfs, r) {
2280 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2281 0xc400,0xb800,0xbc00,0xc000};
2282 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2283 -4.0f,-0.5f,-1.0f,-2.0f};
2284 {
2285 skvm::Builder b;
2286 skvm::Arg src = b.varying<uint16_t>(),
2287 dst = b.varying<float>();
Mike Klein42d67a62020-12-01 10:14:55 -06002288 b.storeF(dst, b.from_fp16(b.load16(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002289
2290 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2291 float dst[8];
2292 program.eval(8, hs, dst);
2293 for (int i = 0; i < 8; i++) {
2294 REPORTER_ASSERT(r, dst[i] == fs[i]);
2295 }
2296 });
2297 }
2298 {
2299 skvm::Builder b;
2300 skvm::Arg src = b.varying<float>(),
2301 dst = b.varying<uint16_t>();
Mike Klein42d67a62020-12-01 10:14:55 -06002302 b.store16(dst, b.to_fp16(b.loadF(src)));
Mike Klein4d680cd2020-07-15 09:58:51 -05002303
2304 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2305 uint16_t dst[8];
2306 program.eval(8, fs, dst);
2307 for (int i = 0; i < 8; i++) {
2308 REPORTER_ASSERT(r, dst[i] == hs[i]);
2309 }
2310 });
2311 }
2312}
Mike Klein6732da02020-07-16 13:03:18 -05002313
2314DEF_TEST(SkVM_64bit, r) {
2315 uint32_t lo[65],
2316 hi[65];
2317 uint64_t wide[65];
2318 for (int i = 0; i < 65; i++) {
2319 lo[i] = 2*i+0;
2320 hi[i] = 2*i+1;
2321 wide[i] = ((uint64_t)lo[i] << 0)
2322 | ((uint64_t)hi[i] << 32);
2323 }
2324
2325 {
2326 skvm::Builder b;
2327 {
2328 skvm::Arg wide = b.varying<uint64_t>(),
2329 lo = b.varying<int>(),
2330 hi = b.varying<int>();
Mike Klein31367892020-07-30 08:19:12 -05002331 b.store32(lo, b.load64(wide, 0));
2332 b.store32(hi, b.load64(wide, 1));
Mike Klein6732da02020-07-16 13:03:18 -05002333 }
2334 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2335 uint32_t l[65], h[65];
2336 program.eval(65, wide,l,h);
2337 for (int i = 0; i < 65; i++) {
2338 REPORTER_ASSERT(r, l[i] == lo[i]);
2339 REPORTER_ASSERT(r, h[i] == hi[i]);
2340 }
2341 });
2342 }
2343
2344 {
2345 skvm::Builder b;
2346 {
2347 skvm::Arg wide = b.varying<uint64_t>(),
2348 lo = b.varying<int>(),
2349 hi = b.varying<int>();
2350 b.store64(wide, b.load32(lo), b.load32(hi));
2351 }
2352 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2353 uint64_t w[65];
2354 program.eval(65, w,lo,hi);
2355 for (int i = 0; i < 65; i++) {
2356 REPORTER_ASSERT(r, w[i] == wide[i]);
2357 }
2358 });
2359 }
2360}
Mike Kleine942b8c2020-07-21 10:17:14 -05002361
2362DEF_TEST(SkVM_is_NaN_is_finite, r) {
2363 skvm::Builder b;
2364 {
2365 skvm::Arg src = b.varying<float>(),
2366 nan = b.varying<int>(),
2367 fin = b.varying<int>();
2368 b.store32(nan, is_NaN (b.loadF(src)));
2369 b.store32(fin, is_finite(b.loadF(src)));
2370 }
2371 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2372 // ±NaN, ±0, ±1, ±inf
2373 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2374 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2375 uint32_t nan[8], fin[8];
2376 program.eval(8, bits, nan,fin);
2377
2378 for (int i = 0; i < 8; i++) {
2379 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2380 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2381 i == 4 || i == 5) ? 0xffffffff : 0));
2382 }
2383 });
2384}
Mike Klein0cfd5032020-07-28 11:08:27 -05002385
2386DEF_TEST(SkVM_args, r) {
2387 // Test we can handle at least six arguments.
2388 skvm::Builder b;
2389 {
2390 skvm::Arg dst = b.varying<float>(),
2391 A = b.varying<float>(),
2392 B = b.varying<float>(),
2393 C = b.varying<float>(),
2394 D = b.varying<float>(),
2395 E = b.varying<float>();
2396 storeF(dst, b.loadF(A)
2397 + b.loadF(B)
2398 + b.loadF(C)
2399 + b.loadF(D)
2400 + b.loadF(E));
2401 }
2402
2403 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2404 float dst[17],A[17],B[17],C[17],D[17],E[17];
2405 for (int i = 0; i < 17; i++) {
2406 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2407 }
2408 program.eval(17, dst,A,B,C,D,E);
2409 for (int i = 0; i < 17; i++) {
2410 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2411 }
2412 });
2413}
Mike Klein9791e502020-09-15 12:43:38 -05002414
Mike Kleinee40ec62020-11-20 15:34:16 -06002415DEF_TEST(SkVM_badpack, r) {
2416 // Test case distilled from actual failing draw,
2417 // originally with a bad arm64 implementation of pack().
2418 skvm::Builder p;
2419 {
2420 skvm::Arg uniforms = p.uniform(),
2421 dst = p.varying<uint16_t>();
2422
Mike Klein5ec9c4e2020-12-01 10:43:46 -06002423 skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
Mike Kleinee40ec62020-11-20 15:34:16 -06002424 a = p.splat(0xf);
2425
2426 skvm::I32 _4444 = p.splat(0);
2427 _4444 = pack(_4444, r, 12);
2428 _4444 = pack(_4444, a, 0);
2429 store16(dst, _4444);
2430 }
2431
2432 test_jit_and_interpreter(p.done(), [&](const skvm::Program& program){
2433 const float uniforms[] = { 0.0f, 0.0f,
2434 1.0f, 0.0f, 0.0f, 1.0f };
2435
2436 uint16_t dst[17] = {0};
2437 program.eval(17, uniforms,dst);
2438 for (int i = 0; i < 17; i++) {
2439 REPORTER_ASSERT(r, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
2440 }
2441 });
2442}