blob: bc9853cf1fa6c78006fa10ee139be31ff6f39afb [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050014#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050015#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050016
Mike Klein7b7077c2019-06-03 17:10:59 -050017using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050018const char* fmt_name(Fmt fmt) {
19 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050020 case Fmt::A8: return "A8";
21 case Fmt::G8: return "G8";
22 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050023 }
24 return "";
25}
26
Mike Klein6b4143e2019-09-18 11:49:29 -050027static void dump(skvm::Builder& builder, SkWStream* o) {
28 skvm::Program program = builder.done();
29 builder.dump(o);
30 o->writeText("\n");
31 program.dump(o);
32 o->writeText("\n");
33}
Mike Klein7e650762019-07-02 15:21:11 -050034
Mike Klein9977efa2019-07-15 12:22:36 -050035template <typename Fn>
Mike Klein10fc1e62020-04-13 11:57:05 -050036static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
Mike Klein10fc1e62020-04-13 11:57:05 -050037 if (program.hasJIT()) {
Mike Kleinb5a30762019-10-16 10:11:56 -050038 test((const skvm::Program&) program);
39 program.dropJIT();
40 }
Mike Klein10fc1e62020-04-13 11:57:05 -050041 test((const skvm::Program&) program);
Mike Kleinb5a30762019-10-16 10:11:56 -050042}
43
44
Mike Klein68c50d02019-05-29 12:57:54 -050045DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050046 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050047
48 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050049 for (int s = 0; s < 3; s++)
50 for (int d = 0; d < 3; d++) {
51 auto srcFmt = (Fmt)s,
52 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050053 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050054
Mike Klein267f5072019-06-03 16:27:46 -050055 buf.writeText(fmt_name(srcFmt));
56 buf.writeText(" over ");
57 buf.writeText(fmt_name(dstFmt));
58 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050059 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050060 }
Mike Klein68c50d02019-05-29 12:57:54 -050061
Mike Klein7b7077c2019-06-03 17:10:59 -050062 // Write the I32 Srcovers also.
63 {
Mike Kleinaab45b52019-07-02 15:39:23 -050064 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050065 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050066 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050067 }
Mike Klein7b7077c2019-06-03 17:10:59 -050068
Mike Kleinf9963112019-08-08 15:13:25 -040069 {
Mike Kleind48488b2019-10-22 12:27:58 -050070 // Demonstrate the value of program reordering.
71 skvm::Builder b;
72 skvm::Arg sp = b.varying<int>(),
73 dp = b.varying<int>();
74
75 skvm::I32 byte = b.splat(0xff);
76
77 skvm::I32 src = b.load32(sp),
78 sr = b.extract(src, 0, byte),
79 sg = b.extract(src, 8, byte),
80 sb = b.extract(src, 16, byte),
81 sa = b.extract(src, 24, byte);
82
83 skvm::I32 dst = b.load32(dp),
84 dr = b.extract(dst, 0, byte),
85 dg = b.extract(dst, 8, byte),
86 db = b.extract(dst, 16, byte),
87 da = b.extract(dst, 24, byte);
88
89 skvm::I32 R = b.add(sr, dr),
90 G = b.add(sg, dg),
91 B = b.add(sb, db),
92 A = b.add(sa, da);
93
94 skvm::I32 rg = b.pack(R, G, 8),
95 ba = b.pack(B, A, 8),
96 rgba = b.pack(rg, ba, 16);
97
98 b.store32(dp, rgba);
99
100 dump(b, &buf);
101 }
102
Mike Klein238105b2020-03-04 17:05:32 -0600103 // Our checked in dump expectations assume we have FMA support.
Mike Klein10fc1e62020-04-13 11:57:05 -0500104 if (skvm::fma_supported()) {
Ben Wagnere8ffb082020-05-04 10:50:08 -0400105 sk_sp<SkData> actual = buf.detachAsData();
106 bool writeActualAsNewExpectation = false;
Mike Klein238105b2020-03-04 17:05:32 -0600107 {
Mike Klein238105b2020-03-04 17:05:32 -0600108 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Ben Wagnere8ffb082020-05-04 10:50:08 -0400109 if (!expected) {
110 ERRORF(r, "Couldn't load SkVMTest.expected.");
111 writeActualAsNewExpectation = true;
Mike Klein267f5072019-06-03 16:27:46 -0500112
Ben Wagnere8ffb082020-05-04 10:50:08 -0400113 } else if (!expected->equals(actual.get())) {
114 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
Adlai Holler684838f2020-05-12 10:41:04 -0400115 (int)expected->size(), expected->data(),
116 (int)actual->size(), actual->data());
Ben Wagnere8ffb082020-05-04 10:50:08 -0400117 writeActualAsNewExpectation = true;
118 }
119 }
120 if (writeActualAsNewExpectation) {
121 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
122 if (out.isValid()) {
123 out.write(actual->data(), actual->size());
Mike Klein77163312019-06-04 13:35:32 -0500124 }
Mike Klein68c50d02019-05-29 12:57:54 -0500125 }
126 }
127
Mike Klein9977efa2019-07-15 12:22:36 -0500128 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500129 uint32_t src[9];
130 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500131
Mike Klein10fc1e62020-04-13 11:57:05 -0500132 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
Mike Klein9977efa2019-07-15 12:22:36 -0500133 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
134 src[i] = 0xbb007733;
135 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500136 }
Mike Klein9977efa2019-07-15 12:22:36 -0500137
138 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
139
140 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
141
142 // dst is probably 0xff2dad72.
143 for (auto got : dst) {
144 auto want = expected;
145 for (int i = 0; i < 4; i++) {
146 uint8_t d = got & 0xff,
147 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500148 if (abs(d-w) >= 2) {
149 SkDebugf("d %02x, w %02x\n", d,w);
150 }
Mike Klein9977efa2019-07-15 12:22:36 -0500151 REPORTER_ASSERT(r, abs(d-w) < 2);
152 got >>= 8;
153 want >>= 8;
154 }
155 }
156 });
Mike Klein3f593792019-06-12 12:54:52 -0500157 };
Mike Klein68c50d02019-05-29 12:57:54 -0500158
Mike Klein37607d42019-07-18 10:17:28 -0500159 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
160 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500161
Mike Klein10fc1e62020-04-13 11:57:05 -0500162 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500163 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500164 uint32_t src[9];
165 uint8_t dst[SK_ARRAY_COUNT(src)];
166
167 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
168 src[i] = 0xbb007733;
169 dst[i] = 0x42;
170 }
171
172 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
173 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500174
175 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
176 SkGetPackedG32(over),
177 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500178 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500179
Mike Klein3f593792019-06-12 12:54:52 -0500180 for (auto got : dst) {
181 REPORTER_ASSERT(r, abs(got-want) < 3);
182 }
Mike Klein9977efa2019-07-15 12:22:36 -0500183 });
Mike Klein68c50d02019-05-29 12:57:54 -0500184
Mike Klein10fc1e62020-04-13 11:57:05 -0500185 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500186 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500187 uint8_t src[256],
188 dst[256];
189 for (int i = 0; i < 256; i++) {
190 src[i] = 255 - i;
191 dst[i] = i;
192 }
193
194 program.eval(256, src, dst);
195
196 for (int i = 0; i < 256; i++) {
197 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
198 SkPackARGB32( i, 0,0,0)));
199 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
200 }
Mike Klein9977efa2019-07-15 12:22:36 -0500201 });
Mike Klein68c50d02019-05-29 12:57:54 -0500202}
Mike Klein81756e42019-06-12 11:36:28 -0500203
Mike Klein7542ab52020-04-02 08:50:16 -0500204DEF_TEST(SkVM_eliminate_dead_code, r) {
205 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400206 {
Mike Klein7542ab52020-04-02 08:50:16 -0500207 skvm::Arg arg = b.varying<int>();
208 skvm::I32 l = b.load32(arg);
209 skvm::I32 a = b.add(l, l);
210 b.add(a, b.splat(7));
211 }
Herb Derbyf20400e2020-03-18 16:11:25 -0400212
Mike Klein7542ab52020-04-02 08:50:16 -0500213 std::vector<skvm::Instruction> program = b.program();
214 REPORTER_ASSERT(r, program.size() == 4);
215
Mike Klein5b701e12020-04-02 10:34:24 -0500216 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -0500217 REPORTER_ASSERT(r, program.size() == 0);
218}
219
220DEF_TEST(SkVM_Usage, r) {
221 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400222 {
Mike Klein7542ab52020-04-02 08:50:16 -0500223 skvm::Arg arg = b.varying<int>(),
224 buf = b.varying<int>();
225 skvm::I32 l = b.load32(arg);
226 skvm::I32 a = b.add(l, l);
227 skvm::I32 s = b.add(a, b.splat(7));
228 b.store32(buf, s);
Herb Derbyf20400e2020-03-18 16:11:25 -0400229 }
Mike Klein7542ab52020-04-02 08:50:16 -0500230
Mike Kleinb7d87902020-04-02 10:14:35 -0500231 skvm::Usage usage{b.program()};
Mike Klein7542ab52020-04-02 08:50:16 -0500232 REPORTER_ASSERT(r, b.program()[0].op == skvm::Op::load32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500233 REPORTER_ASSERT(r, usage[0].size() == 2);
Mike Klein7542ab52020-04-02 08:50:16 -0500234 REPORTER_ASSERT(r, b.program()[1].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500235 REPORTER_ASSERT(r, usage[1].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500236 REPORTER_ASSERT(r, b.program()[2].op == skvm::Op::splat);
Mike Kleinb7d87902020-04-02 10:14:35 -0500237 REPORTER_ASSERT(r, usage[2].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500238 REPORTER_ASSERT(r, b.program()[3].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500239 REPORTER_ASSERT(r, usage[3].size() == 1);
Herb Derbyf20400e2020-03-18 16:11:25 -0400240}
241
Mike Klein9fdadb92019-07-30 12:30:13 -0500242DEF_TEST(SkVM_Pointless, r) {
243 // Let's build a program with no memory arguments.
244 // It should all be pegged as dead code, but we should be able to "run" it.
245 skvm::Builder b;
246 {
247 b.add(b.splat(5.0f),
248 b.splat(4.0f));
249 }
250
Mike Klein10fc1e62020-04-13 11:57:05 -0500251 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -0500252 for (int N = 0; N < 64; N++) {
253 program.eval(N);
254 }
255 });
256
Mike Kleined9b1f12020-02-06 13:02:32 -0600257 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -0500258 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -0500259 }
260}
261
Mike Klein10fc1e62020-04-13 11:57:05 -0500262DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -0600263 skvm::Builder b;
264 b.store32(b.varying<int>(), b.splat(42));
265
Mike Klein10fc1e62020-04-13 11:57:05 -0500266 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
267 int buf[18];
268 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -0600269
Mike Klein10fc1e62020-04-13 11:57:05 -0500270 p.eval(17, buf);
271 for (int i = 0; i < 17; i++) {
272 REPORTER_ASSERT(r, buf[i] == 42);
273 }
274 REPORTER_ASSERT(r, buf[17] == 47);
275 });
Mike Kleinb6149312020-02-26 13:04:23 -0600276}
Mike Klein11efa182020-02-27 12:04:37 -0600277
Mike Klein10fc1e62020-04-13 11:57:05 -0500278DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -0600279 skvm::Builder b;
280 {
281 auto src = b.varying<int>(),
282 dst = b.varying<int>();
283 b.store32(dst, b.load32(src));
284 }
285
Mike Klein10fc1e62020-04-13 11:57:05 -0500286 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
287 int src[] = {1,2,3,4,5,6,7,8,9},
288 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -0600289
Mike Klein10fc1e62020-04-13 11:57:05 -0500290 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
291 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
292 REPORTER_ASSERT(r, dst[i] == src[i]);
293 }
294 size_t i = SK_ARRAY_COUNT(src)-1;
295 REPORTER_ASSERT(r, dst[i] == 0);
296 });
Mike Klein11efa182020-02-27 12:04:37 -0600297}
Mike Kleinb6149312020-02-26 13:04:23 -0600298
Mike Klein81756e42019-06-12 11:36:28 -0500299DEF_TEST(SkVM_LoopCounts, r) {
300 // Make sure we cover all the exact N we want.
301
Mike Klein9977efa2019-07-15 12:22:36 -0500302 // buf[i] += 1
303 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500304 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500305 b.store32(arg,
306 b.add(b.splat(1),
307 b.load32(arg)));
308
Mike Klein10fc1e62020-04-13 11:57:05 -0500309 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500310 int buf[64];
311 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500312 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
313 buf[i] = i;
314 }
315 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500316
Mike Klein9977efa2019-07-15 12:22:36 -0500317 for (int i = 0; i < N; i++) {
318 REPORTER_ASSERT(r, buf[i] == i+1);
319 }
320 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
321 REPORTER_ASSERT(r, buf[i] == i);
322 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500323 }
324 });
Mike Klein81756e42019-06-12 11:36:28 -0500325}
Mike Klein05642042019-06-18 12:16:06 -0500326
Mike Kleinb2b6a992020-01-13 16:34:30 -0600327DEF_TEST(SkVM_gather32, r) {
328 skvm::Builder b;
329 {
330 skvm::Arg uniforms = b.uniform(),
331 buf = b.varying<int>();
332 skvm::I32 x = b.load32(buf);
333 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
334 }
335
Mike Klein10fc1e62020-04-13 11:57:05 -0500336 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600337 const int img[] = {12,34,56,78, 90,98,76,54};
338
339 int buf[20];
340 for (int i = 0; i < 20; i++) {
341 buf[i] = i;
342 }
343
344 struct Uniforms {
345 const int* img;
346 } uniforms{img};
347
348 program.eval(20, &uniforms, buf);
349 int i = 0;
350 REPORTER_ASSERT(r, buf[i] == 12); i++;
351 REPORTER_ASSERT(r, buf[i] == 34); i++;
352 REPORTER_ASSERT(r, buf[i] == 56); i++;
353 REPORTER_ASSERT(r, buf[i] == 78); i++;
354 REPORTER_ASSERT(r, buf[i] == 90); i++;
355 REPORTER_ASSERT(r, buf[i] == 98); i++;
356 REPORTER_ASSERT(r, buf[i] == 76); i++;
357 REPORTER_ASSERT(r, buf[i] == 54); i++;
358
359 REPORTER_ASSERT(r, buf[i] == 12); i++;
360 REPORTER_ASSERT(r, buf[i] == 34); i++;
361 REPORTER_ASSERT(r, buf[i] == 56); i++;
362 REPORTER_ASSERT(r, buf[i] == 78); i++;
363 REPORTER_ASSERT(r, buf[i] == 90); i++;
364 REPORTER_ASSERT(r, buf[i] == 98); i++;
365 REPORTER_ASSERT(r, buf[i] == 76); i++;
366 REPORTER_ASSERT(r, buf[i] == 54); i++;
367
368 REPORTER_ASSERT(r, buf[i] == 12); i++;
369 REPORTER_ASSERT(r, buf[i] == 34); i++;
370 REPORTER_ASSERT(r, buf[i] == 56); i++;
371 REPORTER_ASSERT(r, buf[i] == 78); i++;
372 });
373}
374
Mike Klein81d52672019-07-30 11:11:09 -0500375DEF_TEST(SkVM_gathers, r) {
376 skvm::Builder b;
377 {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600378 skvm::Arg uniforms = b.uniform(),
379 buf32 = b.varying<int>(),
380 buf16 = b.varying<uint16_t>(),
381 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500382
383 skvm::I32 x = b.load32(buf32);
384
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600385 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
386 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
387 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500388 }
389
Mike Klein10fc1e62020-04-13 11:57:05 -0500390 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500391 const int img[] = {12,34,56,78, 90,98,76,54};
392
393 constexpr int N = 20;
394 int buf32[N];
395 uint16_t buf16[N];
396 uint8_t buf8 [N];
397
398 for (int i = 0; i < 20; i++) {
399 buf32[i] = i;
400 }
401
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600402 struct Uniforms {
403 const int* img;
404 } uniforms{img};
405
406 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500407 int i = 0;
408 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
409 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
410 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
411 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
412 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
413 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
414 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
415 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
416
417 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
418 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
419 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
420 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
421 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
422 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
423 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
424 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
425
426 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
427 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
428 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
429 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
430 });
431}
432
Mike Klein21e85eb2020-04-17 13:57:13 -0500433DEF_TEST(SkVM_gathers2, r) {
434 skvm::Builder b;
435 {
436 skvm::Arg uniforms = b.uniform(),
437 buf32 = b.varying<int>(),
438 buf16 = b.varying<uint16_t>(),
439 buf8 = b.varying<uint8_t>();
440
441 skvm::I32 x = b.load32(buf32);
442
443 b.store32(buf32, b.gather32(uniforms,0, x));
444 b.store16(buf16, b.gather16(uniforms,0, x));
445 b.store8 (buf8 , b.gather8 (uniforms,0, x));
446 }
447
448 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
449 uint8_t img[256];
450 for (int i = 0; i < 256; i++) {
451 img[i] = i;
452 }
453
454 int buf32[64];
455 uint16_t buf16[64];
456 uint8_t buf8 [64];
457
458 for (int i = 0; i < 64; i++) {
459 buf32[i] = (i*47)&63;
460 buf16[i] = 0;
461 buf8 [i] = 0;
462 }
463
464 struct Uniforms {
465 const uint8_t* img;
466 } uniforms{img};
467
468 program.eval(64, &uniforms, buf32, buf16, buf8);
469
470 for (int i = 0; i < 64; i++) {
471 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
472 }
473
474 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
475 REPORTER_ASSERT(r, buf16[63] == 0x2322);
476
477 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
478 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
479 });
480}
481
Mike Klein81d52672019-07-30 11:11:09 -0500482DEF_TEST(SkVM_bitops, r) {
483 skvm::Builder b;
484 {
485 skvm::Arg ptr = b.varying<int>();
486
487 skvm::I32 x = b.load32(ptr);
488
Mike Klein4067a942020-04-05 10:25:32 -0500489 x = b.bit_and (x, b.splat(0xf1)); // 0x40
490 x = b.bit_or (x, b.splat(0x80)); // 0xc0
491 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
492 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500493
494 x = b.shl(x, 28); // 0xe000'0000
495 x = b.sra(x, 28); // 0xffff'fffe
496 x = b.shr(x, 1); // 0x7fff'ffff
497
498 b.store32(ptr, x);
499 }
500
Mike Klein10fc1e62020-04-13 11:57:05 -0500501 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500502 int x = 0x42;
503 program.eval(1, &x);
504 REPORTER_ASSERT(r, x == 0x7fff'ffff);
505 });
506}
507
Mike Klein4067a942020-04-05 10:25:32 -0500508DEF_TEST(SkVM_select_is_NaN, r) {
509 skvm::Builder b;
510 {
511 skvm::Arg src = b.varying<float>(),
512 dst = b.varying<float>();
513
514 skvm::F32 x = b.loadF(src);
515 x = select(is_NaN(x), b.splat(0.0f)
516 , x);
517 b.storeF(dst, x);
518 }
519
520 std::vector<skvm::OptimizedInstruction> program = b.optimize();
521 REPORTER_ASSERT(r, program.size() == 4);
522 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
523 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
524 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
525 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
526
Mike Klein10fc1e62020-04-13 11:57:05 -0500527 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500528 // ±NaN, ±0, ±1, ±inf
529 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
530 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
531 uint32_t dst[SK_ARRAY_COUNT(src)];
532 program.eval(SK_ARRAY_COUNT(src), src, dst);
533
534 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
535 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
536 }
537 });
538}
539
Mike Klein81d52672019-07-30 11:11:09 -0500540DEF_TEST(SkVM_f32, r) {
541 skvm::Builder b;
542 {
543 skvm::Arg arg = b.varying<float>();
544
Mike Reedf5ff4c22020-03-23 14:57:53 -0400545 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500546 y = b.add(x,x), // y = 2x
547 z = b.sub(y,x), // z = 2x-x = x
548 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400549 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500550 }
551
Mike Klein10fc1e62020-04-13 11:57:05 -0500552 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500553 float buf[] = { 1,2,3,4,5,6,7,8,9 };
554 program.eval(SK_ARRAY_COUNT(buf), buf);
555 for (float v : buf) {
556 REPORTER_ASSERT(r, v == 1.0f);
557 }
558 });
559}
560
561DEF_TEST(SkVM_cmp_i32, r) {
562 skvm::Builder b;
563 {
564 skvm::I32 x = b.load32(b.varying<int>());
565
566 auto to_bit = [&](int shift, skvm::I32 mask) {
567 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
568 };
569
570 skvm::I32 m = b.splat(0);
571 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
572 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
573 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
574 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
575 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
576 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
577
578 b.store32(b.varying<int>(), m);
579 }
Mike Klein10fc1e62020-04-13 11:57:05 -0500580 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500581 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
582 int out[SK_ARRAY_COUNT(in)];
583
584 program.eval(SK_ARRAY_COUNT(in), in, out);
585
586 REPORTER_ASSERT(r, out[0] == 0b001111);
587 REPORTER_ASSERT(r, out[1] == 0b001100);
588 REPORTER_ASSERT(r, out[2] == 0b001010);
589 REPORTER_ASSERT(r, out[3] == 0b001010);
590 REPORTER_ASSERT(r, out[4] == 0b000010);
591 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
592 REPORTER_ASSERT(r, out[i] == 0b110010);
593 }
594 });
595}
596
597DEF_TEST(SkVM_cmp_f32, r) {
598 skvm::Builder b;
599 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400600 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500601
602 auto to_bit = [&](int shift, skvm::I32 mask) {
603 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
604 };
605
606 skvm::I32 m = b.splat(0);
607 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
608 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
609 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
610 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
611 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
612 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
613
614 b.store32(b.varying<int>(), m);
615 }
616
Mike Klein10fc1e62020-04-13 11:57:05 -0500617 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500618 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
619 int out[SK_ARRAY_COUNT(in)];
620
621 program.eval(SK_ARRAY_COUNT(in), in, out);
622
623 REPORTER_ASSERT(r, out[0] == 0b001111);
624 REPORTER_ASSERT(r, out[1] == 0b001100);
625 REPORTER_ASSERT(r, out[2] == 0b001010);
626 REPORTER_ASSERT(r, out[3] == 0b001010);
627 REPORTER_ASSERT(r, out[4] == 0b000010);
628 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
629 REPORTER_ASSERT(r, out[i] == 0b110010);
630 }
631 });
632}
633
Mike Klein14548b92020-02-28 14:02:29 -0600634DEF_TEST(SkVM_index, r) {
635 skvm::Builder b;
636 b.store32(b.varying<int>(), b.index());
637
Mike Klein10fc1e62020-04-13 11:57:05 -0500638 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600639 int buf[23];
640 program.eval(SK_ARRAY_COUNT(buf), buf);
641 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
642 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
643 }
644 });
645}
646
Mike Klein4a131192019-07-19 13:56:41 -0500647DEF_TEST(SkVM_mad, r) {
648 // This program is designed to exercise the tricky corners of instruction
649 // and register selection for Op::mad_f32.
650
651 skvm::Builder b;
652 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500653 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500654
655 skvm::F32 x = b.to_f32(b.load32(arg)),
656 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
657 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
658 w = b.mad(z,z,y), // w can alias z but not y.
659 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600660 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500661 }
662
Mike Klein10fc1e62020-04-13 11:57:05 -0500663 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500664 int x = 2;
665 program.eval(1, &x);
666 // x = 2
667 // y = 2*2 + 2 = 6
668 // z = 6*6 + 2 = 38
669 // w = 38*38 + 6 = 1450
670 // v = 1450*6 + 1450 = 10150
671 REPORTER_ASSERT(r, x == 10150);
672 });
673}
674
Mike Klein7c0332c2020-03-05 14:18:04 -0600675DEF_TEST(SkVM_fms, r) {
676 // Create a pattern that can be peepholed into an Op::fms_f32.
677 skvm::Builder b;
678 {
679 skvm::Arg arg = b.varying<int>();
680
681 skvm::F32 x = b.to_f32(b.load32(arg)),
682 v = b.sub(b.mul(x, b.splat(2.0f)),
683 b.splat(1.0f));
684 b.store32(arg, b.trunc(v));
685 }
686
Mike Klein10fc1e62020-04-13 11:57:05 -0500687 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600688 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
689 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
690
691 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
692 REPORTER_ASSERT(r, buf[i] = 2*i-1);
693 }
694 });
695}
696
697DEF_TEST(SkVM_fnma, r) {
698 // Create a pattern that can be peepholed into an Op::fnma_f32.
699 skvm::Builder b;
700 {
701 skvm::Arg arg = b.varying<int>();
702
703 skvm::F32 x = b.to_f32(b.load32(arg)),
704 v = b.sub(b.splat(1.0f),
705 b.mul(x, b.splat(2.0f)));
706 b.store32(arg, b.trunc(v));
707 }
708
Mike Klein10fc1e62020-04-13 11:57:05 -0500709 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600710 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
711 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
712
713 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
714 REPORTER_ASSERT(r, buf[i] = 1-2*i);
715 }
716 });
717}
718
Mike Klein81d52672019-07-30 11:11:09 -0500719DEF_TEST(SkVM_madder, r) {
720 skvm::Builder b;
721 {
722 skvm::Arg arg = b.varying<float>();
723
Mike Reedf5ff4c22020-03-23 14:57:53 -0400724 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500725 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
726 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
727 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400728 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500729 }
730
Mike Klein10fc1e62020-04-13 11:57:05 -0500731 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500732 float x = 2.0f;
733 // y = 2*2 + 2 = 6
734 // z = 6*2 + 6 = 18
735 // w = 6*6 + 18 = 54
736 program.eval(1, &x);
737 REPORTER_ASSERT(r, x == 54.0f);
738 });
739}
740
Mike Kleinf22faaf2020-01-09 07:27:39 -0600741DEF_TEST(SkVM_floor, r) {
742 skvm::Builder b;
743 {
744 skvm::Arg arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400745 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600746 }
747
Mike Klein10fc1e62020-04-13 11:57:05 -0500748 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600749 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
750 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
751 program.eval(SK_ARRAY_COUNT(buf), buf);
752 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
753 REPORTER_ASSERT(r, buf[i] == want[i]);
754 }
755 });
756}
757
Mike Klein5caf7de2020-03-12 11:05:46 -0500758DEF_TEST(SkVM_round, r) {
759 skvm::Builder b;
760 {
761 skvm::Arg src = b.varying<float>();
762 skvm::Arg dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400763 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500764 }
765
766 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
767 // We haven't explicitly guaranteed that here... it just probably is.
Mike Klein10fc1e62020-04-13 11:57:05 -0500768 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500769 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
770 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
771 int dst[SK_ARRAY_COUNT(buf)];
772
773 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
774 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
775 REPORTER_ASSERT(r, dst[i] == want[i]);
776 }
777 });
778}
779
Herb Derbyc02a41f2020-02-28 14:25:45 -0600780DEF_TEST(SkVM_min, r) {
781 skvm::Builder b;
782 {
783 skvm::Arg src1 = b.varying<float>();
784 skvm::Arg src2 = b.varying<float>();
785 skvm::Arg dst = b.varying<float>();
786
Mike Reedf5ff4c22020-03-23 14:57:53 -0400787 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600788 }
789
Mike Klein10fc1e62020-04-13 11:57:05 -0500790 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600791 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
792 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
793 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
794 float d[SK_ARRAY_COUNT(s1)];
795 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
796 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
797 REPORTER_ASSERT(r, d[i] == want[i]);
798 }
799 });
800}
801
802DEF_TEST(SkVM_max, r) {
803 skvm::Builder b;
804 {
805 skvm::Arg src1 = b.varying<float>();
806 skvm::Arg src2 = b.varying<float>();
807 skvm::Arg dst = b.varying<float>();
808
Mike Reedf5ff4c22020-03-23 14:57:53 -0400809 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600810 }
811
Mike Klein10fc1e62020-04-13 11:57:05 -0500812 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600813 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
814 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
815 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
816 float d[SK_ARRAY_COUNT(s1)];
817 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
818 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
819 REPORTER_ASSERT(r, d[i] == want[i]);
820 }
821 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600822}
823
Mike Kleinf98d0d32019-07-22 14:30:18 -0500824DEF_TEST(SkVM_hoist, r) {
825 // This program uses enough constants that it will fail to JIT if we hoist them.
826 // The JIT will try again without hoisting, and that'll just need 2 registers.
827 skvm::Builder b;
828 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500829 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500830 skvm::I32 x = b.load32(arg);
831 for (int i = 0; i < 32; i++) {
832 x = b.add(x, b.splat(i));
833 }
834 b.store32(arg, x);
835 }
836
Mike Klein10fc1e62020-04-13 11:57:05 -0500837 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500838 int x = 4;
839 program.eval(1, &x);
840 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
841 // x += 496
842 REPORTER_ASSERT(r, x == 500);
843 });
844}
845
Mike Kleinb9944122019-08-02 12:22:39 -0500846DEF_TEST(SkVM_select, r) {
847 skvm::Builder b;
848 {
849 skvm::Arg buf = b.varying<int>();
850
851 skvm::I32 x = b.load32(buf);
852
853 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
854
855 b.store32(buf, x);
856 }
857
Mike Klein10fc1e62020-04-13 11:57:05 -0500858 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500859 int buf[] = { 0,1,2,3,4,5,6,7,8 };
860 program.eval(SK_ARRAY_COUNT(buf), buf);
861 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
862 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
863 }
864 });
865}
866
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500867DEF_TEST(SkVM_NewOps, r) {
868 // Exercise a somewhat arbitrary set of new ops.
869 skvm::Builder b;
870 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500871 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500872 uniforms = b.uniform();
873
874 skvm::I32 x = b.load16(buf);
875
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600876 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500877
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600878 x = b.add(x, b.uniform32(uniforms, kPtr+0));
879 x = b.mul(x, b.uniform8 (uniforms, kPtr+4));
880 x = b.sub(x, b.uniform16(uniforms, kPtr+6));
881
882 skvm::I32 limit = b.uniform32(uniforms, kPtr+8);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500883 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
884 x = b.select(b.gt(x, limit ), limit , x);
885
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600886 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500887
888 b.store16(buf, x);
889 }
890
891 if ((false)) {
892 SkDynamicMemoryWStream buf;
893 dump(b, &buf);
894 sk_sp<SkData> blob = buf.detachAsData();
895 SkDebugf("%.*s\n", blob->size(), blob->data());
896 }
897
Mike Klein10fc1e62020-04-13 11:57:05 -0500898 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500899 const int N = 31;
900 int16_t buf[N];
901 for (int i = 0; i < N; i++) {
902 buf[i] = i;
903 }
904
905 const int M = 16;
906 uint8_t img[M];
907 for (int i = 0; i < M; i++) {
908 img[i] = i*i;
909 }
910
911 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600912 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500913 int add = 5;
914 uint8_t mul = 3;
915 uint16_t sub = 18;
916 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600917 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500918
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600919 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500920
921 for (int i = 0; i < N; i++) {
922 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
923 int x = 3*(i-1);
924
925 // Then that's pinned to the limits of img.
926 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
927 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
928 REPORTER_ASSERT(r, buf[i] == img[x]);
929 }
930 });
931}
932
Mike Klein5a8404c2020-02-28 14:24:56 -0600933DEF_TEST(SkVM_sqrt, r) {
934 skvm::Builder b;
935 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400936 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600937
Mike Klein10fc1e62020-04-13 11:57:05 -0500938 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600939 constexpr int K = 17;
940 float buf[K];
941 for (int i = 0; i < K; i++) {
942 buf[i] = (float)(i*i);
943 }
944
945 // x^2 -> x
946 program.eval(K, buf);
947
948 for (int i = 0; i < K; i++) {
949 REPORTER_ASSERT(r, buf[i] == (float)i);
950 }
951 });
952}
953
Mike Klein3f7c8652019-11-07 10:33:56 -0600954DEF_TEST(SkVM_MSAN, r) {
955 // This little memset32() program should be able to JIT, but if we run that
956 // JIT code in an MSAN build, it won't see the writes initialize buf. So
957 // this tests that we're using the interpreter instead.
958 skvm::Builder b;
959 b.store32(b.varying<int>(), b.splat(42));
960
Mike Klein10fc1e62020-04-13 11:57:05 -0500961 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600962 constexpr int K = 17;
963 int buf[K]; // Intentionally uninitialized.
964 program.eval(K, buf);
965 sk_msan_assert_initialized(buf, buf+K);
966 for (int x : buf) {
967 REPORTER_ASSERT(r, x == 42);
968 }
969 });
970}
971
Mike Klein13601172019-11-08 15:01:02 -0600972DEF_TEST(SkVM_assert, r) {
973 skvm::Builder b;
974 b.assert_true(b.lt(b.load32(b.varying<int>()),
975 b.splat(42)));
976
Mike Klein10fc1e62020-04-13 11:57:05 -0500977 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600978 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600979 program.eval(SK_ARRAY_COUNT(buf), buf);
980 });
981}
982
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600983DEF_TEST(SkVM_premul, reporter) {
984 // Test that premul is short-circuited when alpha is known opaque.
985 {
986 skvm::Builder p;
987 auto rptr = p.varying<int>(),
988 aptr = p.varying<int>();
989
Mike Reedf5ff4c22020-03-23 14:57:53 -0400990 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600991 g = p.splat(0.0f),
992 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400993 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600994
995 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400996 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600997
998 // load red, load alpha, red *= alpha, store red
999 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
1000 }
1001
1002 {
1003 skvm::Builder p;
1004 auto rptr = p.varying<int>();
1005
Mike Reedf5ff4c22020-03-23 14:57:53 -04001006 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001007 g = p.splat(0.0f),
1008 b = p.splat(0.0f),
1009 a = p.splat(1.0f);
1010
1011 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001012 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001013
1014 // load red, store red
1015 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1016 }
1017
1018 // Same deal for unpremul.
1019 {
1020 skvm::Builder p;
1021 auto rptr = p.varying<int>(),
1022 aptr = p.varying<int>();
1023
Mike Reedf5ff4c22020-03-23 14:57:53 -04001024 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001025 g = p.splat(0.0f),
1026 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001027 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001028
1029 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001030 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001031
1032 // load red, load alpha, a bunch of unpremul instructions, store red
1033 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1034 }
1035
1036 {
1037 skvm::Builder p;
1038 auto rptr = p.varying<int>();
1039
Mike Reedf5ff4c22020-03-23 14:57:53 -04001040 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001041 g = p.splat(0.0f),
1042 b = p.splat(0.0f),
1043 a = p.splat(1.0f);
1044
1045 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001046 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001047
1048 // load red, store red
1049 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1050 }
1051}
Mike Klein05642042019-06-18 12:16:06 -05001052
Mike Klein05642042019-06-18 12:16:06 -05001053template <typename Fn>
1054static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -04001055 uint8_t buf[4096];
1056 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -05001057 fn(a);
1058
1059 REPORTER_ASSERT(r, a.size() == expected.size());
1060
Mike Klein88c0a902019-06-24 15:34:02 -04001061 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -05001062 want = expected.begin();
1063 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -05001064 REPORTER_ASSERT(r, got[i] == want[i],
1065 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -05001066 }
1067}
1068
1069DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -05001070 // Easiest way to generate test cases is
1071 //
1072 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1073 //
1074 // The -x86-asm-syntax=intel bit is optional, controlling the
1075 // input syntax only; the output will always be AT&T op x,y,dst style.
1076 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1077 // that a bit easier to use here, despite maybe favoring AT&T overall.
1078
1079 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -05001080 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -05001081 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -06001082 a.int3();
Mike Klein05642042019-06-18 12:16:06 -05001083 a.vzeroupper();
1084 a.ret();
1085 },{
Mike Kleinee5864a2019-11-11 09:16:44 -06001086 0xcc,
Mike Klein05642042019-06-18 12:16:06 -05001087 0xc5, 0xf8, 0x77,
1088 0xc3,
1089 });
1090
Mike Klein237dbb42019-07-19 09:44:47 -05001091 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -05001092 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -05001093 a.ret();
1094 a.align(4);
1095 },{
1096 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -05001097 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -05001098 });
Mike Klein61703a62019-06-18 15:01:12 -05001099
Mike Klein397fc882019-06-20 11:37:10 -05001100 test_asm(r, [&](A& a) {
1101 a.add(A::rax, 8); // Always good to test rax.
1102 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001103
Mike Klein397fc882019-06-20 11:37:10 -05001104 a.add(A::rdi, 12); // Last 0x48 REX
1105 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001106
Mike Klein86a645c2019-07-12 12:29:39 -05001107 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001108 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001109
Mike Klein397fc882019-06-20 11:37:10 -05001110 a.add(A::rsi, 128); // Requires 4 byte immediate.
1111 a.sub(A::r8 , 1000000);
Mike Kleinc15c9362020-04-16 11:10:36 -05001112
1113 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1114 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1115 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
Mike Klein68d075e2020-07-28 09:26:51 -05001116 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
Mike Kleinc15c9362020-04-16 11:10:36 -05001117 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
Mike Klein68d075e2020-07-28 09:26:51 -05001118 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
1119 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
Mike Kleinc15c9362020-04-16 11:10:36 -05001120 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1121 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1122 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1123
1124 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1125
1126 a.add( A::rax , A::rcx); // addq %rcx, %rax
1127 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1128 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1129 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1130
1131 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
Mike Klein61703a62019-06-18 15:01:12 -05001132 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001133 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001134 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001135
1136 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001137 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001138
Mike Klein86a645c2019-07-12 12:29:39 -05001139 0x49, 0x83, 0b11'000'000, 0x07,
1140 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001141
1142 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001143 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Kleinc15c9362020-04-16 11:10:36 -05001144
1145 0x48,0x83,0x06,0x07,
1146 0x48,0x83,0x46,0x0c,0x07,
1147 0x48,0x83,0x44,0x24,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001148 0x49,0x83,0x44,0x24,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001149 0x48,0x83,0x44,0x84,0x0c,0x07,
Mike Klein68d075e2020-07-28 09:26:51 -05001150 0x49,0x83,0x44,0x84,0x0c,0x07,
1151 0x4a,0x83,0x44,0xa0,0x0c,0x07,
Mike Kleinc15c9362020-04-16 11:10:36 -05001152 0x4b,0x83,0x44,0x43,0x0c,0x07,
1153 0x49,0x83,0x44,0x03,0x0c,0x07,
1154 0x4a,0x83,0x44,0x18,0x0c,0x07,
1155
1156 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1157
1158 0x48,0x01,0xc8,
1159 0x48,0x01,0x08,
1160 0x48,0x01,0x48,0x0c,
1161 0x48,0x03,0x48,0x0c,
1162 0x48,0x2b,0x48,0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001163 });
Mike Klein397fc882019-06-20 11:37:10 -05001164
1165
1166 test_asm(r, [&](A& a) {
1167 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1168 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1169 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1170 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1171 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1172 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1173 },{
1174 /* VEX */ /*op*/ /*modRM*/
1175 0xc5, 0xf5, 0xfe, 0xc2,
1176 0xc5, 0x75, 0xfe, 0xc2,
1177 0xc5, 0xbd, 0xfe, 0xc2,
1178 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1179 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1180 0xc5, 0xf5, 0xfa, 0xc2,
1181 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001182
1183 test_asm(r, [&](A& a) {
Mike Klein84dd8f92020-09-15 07:57:27 -05001184 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1185 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1186 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1187 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1188
1189 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1190 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1191 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1192 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1193
1194 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1195 a.vpabsw (A::ymm4, A::ymm3);
1196 a.vpsllw (A::ymm4, A::ymm3, 12);
1197 a.vpsraw (A::ymm4, A::ymm3, 12);
1198 },{
1199 0xc5, 0xe5, 0xfd, 0xe2,
1200 0xc5, 0xe5, 0xe3, 0xe2,
1201 0xc5, 0xe5, 0x75, 0xe2,
1202 0xc5, 0xe5, 0x65, 0xe2,
1203
1204 0xc5, 0xe5, 0xea, 0xe2,
1205 0xc5, 0xe5, 0xee, 0xe2,
1206 0xc4,0xe2,0x65, 0x3a, 0xe2,
1207 0xc4,0xe2,0x65, 0x3e, 0xe2,
1208
1209 0xc4,0xe2,0x65, 0x0b, 0xe2,
1210 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1211 0xc5,0xdd,0x71, 0xf3, 0x0c,
1212 0xc5,0xdd,0x71, 0xe3, 0x0c,
1213 });
1214
1215 test_asm(r, [&](A& a) {
Mike Klein48c51bb2020-04-23 18:07:49 -05001216 A::Label l;
1217 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
Mike Klein714f8cc2019-11-06 12:54:46 -06001218 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1219 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1220 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1221 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1222 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1223 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Klein48c51bb2020-04-23 18:07:49 -05001224 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
Mike Kleinb9944122019-08-02 12:22:39 -05001225 },{
Mike Klein48c51bb2020-04-23 18:07:49 -05001226 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
Mike Kleinb9944122019-08-02 12:22:39 -05001227 0xc5,0xf5,0x76,0xc2,
1228 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001229 0xc5,0xf4,0xc2,0xc2,0x00,
1230 0xc5,0xf4,0xc2,0xc2,0x01,
1231 0xc5,0xf4,0xc2,0xc2,0x02,
1232 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001233 });
1234
1235 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001236 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1237 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1238 },{
1239 0xc5,0xf4,0x5d,0xc2,
1240 0xc5,0xf4,0x5f,0xc2,
1241 });
1242
1243 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001244 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1245 },{
1246 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1247 });
1248
1249 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001250 a.vpsrld(A::ymm15, A::ymm2, 8);
1251 a.vpsrld(A::ymm0 , A::ymm8, 5);
1252 },{
1253 0xc5, 0x85, 0x72,0xd2, 0x08,
1254 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1255 });
1256
1257 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001258 A::Label l;
Mike Klein184f6012020-07-22 13:17:29 -05001259 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
Mike Kleind8194dc2020-07-22 10:42:11 -05001260 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
Mike Kleinff0ae812019-06-20 15:03:44 -05001261 a.vpermq(A::ymm1, A::ymm2, 5);
Mike Kleind8194dc2020-07-22 10:42:11 -05001262 a.label(&l); // 6 bytes after vperm2f128
Mike Kleinff0ae812019-06-20 15:03:44 -05001263 },{
Mike Klein184f6012020-07-22 13:17:29 -05001264 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
Mike Kleind8194dc2020-07-22 10:42:11 -05001265 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
Mike Kleinff0ae812019-06-20 15:03:44 -05001266 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1267 });
Mike Kleine5053412019-06-21 12:37:22 -05001268
1269 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001270 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1271 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1272 },{
1273 0xc5,0xed,0x62,0x0f,
1274 0xc5,0xed,0x6a,0xcb,
1275 });
1276
1277 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001278 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1279 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1280 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1281 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1282 },{
1283 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1284 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1285 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1286 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1287 });
1288
1289 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001290 A::Label l;
1291 a.label(&l);
Mike Kleine5053412019-06-21 12:37:22 -05001292 a.byte(1);
1293 a.byte(2);
1294 a.byte(3);
1295 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001296
Mike Klein65c10b52019-07-12 09:22:21 -05001297 a.vbroadcastss(A::ymm0 , &l);
1298 a.vbroadcastss(A::ymm1 , &l);
1299 a.vbroadcastss(A::ymm8 , &l);
1300 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001301
Mike Klein65c10b52019-07-12 09:22:21 -05001302 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001303 a.vpaddd (A::ymm4, A::ymm3, &l);
1304 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001305
1306 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001307
1308 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001309 },{
1310 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001311
Mike Kleine5053412019-06-21 12:37:22 -05001312 /* VEX */ /*op*/ /* ModRM */ /* offset */
1313 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1314 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1315 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1316 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001317
1318 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001319
1320 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1321 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001322
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001323 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1324
1325 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001326 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001327
1328 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001329 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1330 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1331 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1332 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
Mike Klein94d054b2019-08-02 10:54:23 -05001333
1334 a.vbroadcastss(A::ymm8, A::xmm0);
1335 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001336 },{
1337 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1338 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1339 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1340 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1341 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001342
1343 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1344 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001345 });
1346
1347 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001348 A::Label l;
1349 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001350 a.jne(&l);
1351 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001352 a.je (&l);
1353 a.jmp(&l);
1354 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001355 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001356
Mike Kleinc15c9362020-04-16 11:10:36 -05001357 a.cmp(A::rdx, 1);
Mike Klein35b97c32019-07-12 12:32:45 -05001358 a.cmp(A::rax, 12);
1359 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001360 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001361 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1362 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1363 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1364 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1365 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001366 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001367
Mike Kleinc15c9362020-04-16 11:10:36 -05001368 0x48,0x83,0xfa,0x01,
Mike Klein35b97c32019-07-12 12:32:45 -05001369 0x48,0x83,0xf8,0x0c,
1370 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001371 });
Mike Klein120d9e82019-06-21 15:52:55 -05001372
1373 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001374 a.vmovups(A::ymm5, A::Mem{A::rsi});
1375 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001376
Mike Klein400ba222020-06-30 15:54:19 -05001377 a.vmovups(A::xmm5, A::Mem{A::rsi});
Mike Klein8390f2e2020-04-15 17:03:08 -05001378 a.vmovups(A::Mem{A::rsi}, A::xmm5);
Mike Klein95529e82019-08-02 11:43:43 -05001379
Mike Kleinedc2dac2020-04-15 16:18:27 -05001380 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1381 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001382
Mike Klein8390f2e2020-04-15 17:03:08 -05001383 a.vmovq(A::Mem{A::rdx}, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001384 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001385 /* VEX */ /*Op*/ /* ModRM */
1386 0xc5, 0xfc, 0x10, 0b00'101'110,
1387 0xc5, 0xfc, 0x11, 0b00'101'110,
1388
Mike Klein400ba222020-06-30 15:54:19 -05001389 0xc5, 0xf8, 0x10, 0b00'101'110,
Mike Klein95529e82019-08-02 11:43:43 -05001390 0xc5, 0xf8, 0x11, 0b00'101'110,
1391
Mike Klein52010b72019-08-02 11:18:00 -05001392 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001393 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001394
1395 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001396 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001397
1398 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001399 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1400 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1401 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001402
Mike Kleinedc2dac2020-04-15 16:18:27 -05001403 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1404 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1405 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001406 },{
1407 0xc5,0xfc,0x10,0x2c,0x24,
1408 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1409 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1410
1411 0xc5,0xfc,0x11,0x2c,0x24,
1412 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1413 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1414 });
1415
1416 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001417 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1418 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1419 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1420 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1421 a.movzbq(A::r8, A::Mem{A::rsi, 400});
Mike Klein35b97c32019-07-12 12:32:45 -05001422
Mike Kleinc15c9362020-04-16 11:10:36 -05001423 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1424 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1425 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1426 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1427 a.movzwq(A::r8, A::Mem{A::rsi, 400});
Mike Kleincb511042020-04-13 13:12:17 -05001428
Mike Klein8390f2e2020-04-15 17:03:08 -05001429 a.vmovd(A::Mem{A::rax}, A::xmm0);
1430 a.vmovd(A::Mem{A::rax}, A::xmm8);
1431 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1432
1433 a.vmovd(A::xmm0, A::Mem{A::rax});
1434 a.vmovd(A::xmm8, A::Mem{A::rax});
1435 a.vmovd(A::xmm0, A::Mem{A::r8 });
1436
1437 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1438 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1439 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1440
Mike Klein35b97c32019-07-12 12:32:45 -05001441 a.vmovd(A::rax, A::xmm0);
1442 a.vmovd(A::rax, A::xmm8);
Mike Klein8390f2e2020-04-15 17:03:08 -05001443 a.vmovd(A::r8 , A::xmm0);
Mike Klein35b97c32019-07-12 12:32:45 -05001444
1445 a.vmovd(A::xmm0, A::rax);
1446 a.vmovd(A::xmm8, A::rax);
Mike Klein8390f2e2020-04-15 17:03:08 -05001447 a.vmovd(A::xmm0, A::r8 );
Mike Klein35b97c32019-07-12 12:32:45 -05001448
Mike Kleinc15c9362020-04-16 11:10:36 -05001449 a.movb(A::Mem{A::rdx}, A::rax);
1450 a.movb(A::Mem{A::rdx}, A::r8 );
1451 a.movb(A::Mem{A::r8 }, A::rax);
Mike Klein35b97c32019-07-12 12:32:45 -05001452
Mike Kleinc15c9362020-04-16 11:10:36 -05001453 a.movb(A::rdx, A::Mem{A::rax});
1454 a.movb(A::rdx, A::Mem{A::r8 });
1455 a.movb(A::r8 , A::Mem{A::rax});
1456
1457 a.movb(A::rdx, 12);
1458 a.movb(A::rax, 4);
1459 a.movb(A::r8 , -1);
1460
1461 a.movb(A::Mem{A::rdx}, 12);
1462 a.movb(A::Mem{A::rax}, 4);
1463 a.movb(A::Mem{A::r8 }, -1);
1464 },{
1465 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1466 0x49,0x0f,0xb6,0x00,
1467 0x4c,0x0f,0xb6,0x06,
1468 0x4c,0x0f,0xb6,0x46, 12,
1469 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1470
1471 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1472 0x49,0x0f,0xb7,0x00,
1473 0x4c,0x0f,0xb7,0x06,
1474 0x4c,0x0f,0xb7,0x46, 12,
1475 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
Mike Kleincb511042020-04-13 13:12:17 -05001476
Mike Klein35b97c32019-07-12 12:32:45 -05001477 0xc5,0xf9,0x7e,0x00,
1478 0xc5,0x79,0x7e,0x00,
1479 0xc4,0xc1,0x79,0x7e,0x00,
1480
1481 0xc5,0xf9,0x6e,0x00,
1482 0xc5,0x79,0x6e,0x00,
1483 0xc4,0xc1,0x79,0x6e,0x00,
1484
Mike Klein93d3fab2020-01-14 10:46:44 -06001485 0xc5,0xf9,0x6e,0x04,0x88,
1486 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1487 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1488
Mike Klein35b97c32019-07-12 12:32:45 -05001489 0xc5,0xf9,0x7e,0xc0,
1490 0xc5,0x79,0x7e,0xc0,
1491 0xc4,0xc1,0x79,0x7e,0xc0,
1492
1493 0xc5,0xf9,0x6e,0xc0,
1494 0xc5,0x79,0x6e,0xc0,
1495 0xc4,0xc1,0x79,0x6e,0xc0,
1496
Mike Kleinc15c9362020-04-16 11:10:36 -05001497 0x48 ,0x88, 0x02,
1498 0x4c, 0x88, 0x02,
1499 0x49, 0x88, 0x00,
1500
1501 0x48 ,0x8a, 0x10,
1502 0x49, 0x8a, 0x10,
1503 0x4c, 0x8a, 0x00,
1504
1505 0x48, 0xc6, 0xc2, 0x0c,
1506 0x48, 0xc6, 0xc0, 0x04,
1507 0x49, 0xc6, 0xc0, 0xff,
1508
1509 0x48, 0xc6, 0x02, 0x0c,
1510 0x48, 0xc6, 0x00, 0x04,
1511 0x49, 0xc6, 0x00, 0xff,
Mike Klein35b97c32019-07-12 12:32:45 -05001512 });
1513
1514 test_asm(r, [&](A& a) {
Mike Klein4ecc9702020-07-30 10:03:10 -05001515 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1516 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1517
Mike Klein8390f2e2020-04-15 17:03:08 -05001518 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1519 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
Mike Klein52010b72019-08-02 11:18:00 -05001520
Mike Klein8390f2e2020-04-15 17:03:08 -05001521 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
Mike Klein4ecc9702020-07-30 10:03:10 -05001522 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
Mike Klein35b97c32019-07-12 12:32:45 -05001523
Mike Klein21e85eb2020-04-17 13:57:13 -05001524 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1525 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1526
1527 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1528 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1529
Mike Klein8390f2e2020-04-15 17:03:08 -05001530 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1531 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein95529e82019-08-02 11:43:43 -05001532
Mike Klein8390f2e2020-04-15 17:03:08 -05001533 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1534 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein35b97c32019-07-12 12:32:45 -05001535 },{
Mike Klein4ecc9702020-07-30 10:03:10 -05001536 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1537 0xc4,0x43,0x71, 0x22, 0x00, 3,
1538
Mike Klein52010b72019-08-02 11:18:00 -05001539 0xc5,0xb9, 0xc4, 0x0e, 4,
1540 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1541
Mike Klein35b97c32019-07-12 12:32:45 -05001542 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1543 0xc4,0x43,0x71, 0x20, 0x00, 12,
1544
Mike Klein21e85eb2020-04-17 13:57:13 -05001545 0xc4,0x63,0x7d,0x39,0xc1, 1,
1546 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1547
1548 0xc4,0x63,0x79,0x16,0x06, 3,
1549 0xc4,0xc3,0x79,0x16,0x08, 2,
1550
Mike Klein95529e82019-08-02 11:43:43 -05001551 0xc4,0x63,0x79, 0x15, 0x06, 7,
1552 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1553
Mike Klein35b97c32019-07-12 12:32:45 -05001554 0xc4,0x63,0x79, 0x14, 0x06, 7,
1555 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1556 });
1557
1558 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001559 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1560 },{
1561 0xc5, 0x9d, 0xdf, 0xda,
1562 });
Mike Klein9f4df802019-06-24 18:47:16 -04001563
Mike Kleind4546d62019-07-30 12:15:40 -05001564 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001565 A::Label l;
1566 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1567
1568 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1569 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1570 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1571
1572 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1573 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1574
1575 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1576 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1577 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1578 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1579 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1580
1581 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1582 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1583 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1584
Mike Kleind4546d62019-07-30 12:15:40 -05001585 a.vcvttps2dq(A::ymm3, A::ymm2);
1586 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001587 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001588 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001589 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001590 },{
1591 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001592
1593 0xc5,0xfd,0x6f,0x1e,
1594 0xc5,0xfd,0x6f,0x1c,0x24,
1595 0xc4,0xc1,0x7d,0x6f,0x1b,
1596
1597 0xc5,0xfd,0x6f,0x5e,0x04,
1598 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1599
1600 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1601 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1602 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1603 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1604 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1605
1606 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1607 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1608
1609 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1610
Mike Kleind4546d62019-07-30 12:15:40 -05001611 0xc5,0xfe,0x5b,0xda,
1612 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001613 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001614 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001615 });
1616
Mike Kleinbeaa1082020-01-13 14:04:18 -06001617 test_asm(r, [&](A& a) {
Mike Klein4d680cd2020-07-15 09:58:51 -05001618 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1619 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1620
1621 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1622 a.vcvtph2ps(A::ymm2, A::xmm3);
1623 },{
1624 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1625 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1626
1627 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1628 0xc4,0xe2,0x7d,0x13,0xd3,
1629 });
1630
1631 test_asm(r, [&](A& a) {
Mike Kleinbeaa1082020-01-13 14:04:18 -06001632 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1633 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1634 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1635 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1636 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1637 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1638 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1639 },{
1640 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1641 0xc4,0xe2,0x75,0x92,0x04,0x10,
1642 0xc4,0x62,0x75,0x92,0x14,0x10,
1643 0xc4,0xa2,0x75,0x92,0x04,0x20,
1644 0xc4,0xc2,0x75,0x92,0x04,0x11,
1645 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1646 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1647 });
1648
Mike Kleinc322f632020-01-13 16:18:58 -06001649 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001650 a.mov(A::rax, A::Mem{A::rdi, 0});
1651 a.mov(A::rax, A::Mem{A::rdi, 1});
1652 a.mov(A::rax, A::Mem{A::rdi, 512});
1653 a.mov(A::r15, A::Mem{A::r13, 42});
1654 a.mov(A::rax, A::Mem{A::r13, 42});
1655 a.mov(A::r15, A::Mem{A::rax, 42});
1656 a.mov(A::rax, 1);
1657 a.mov(A::rax, A::rcx);
Mike Kleinc322f632020-01-13 16:18:58 -06001658 },{
1659 0x48, 0x8b, 0x07,
1660 0x48, 0x8b, 0x47, 0x01,
1661 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1662 0x4d, 0x8b, 0x7d, 0x2a,
1663 0x49, 0x8b, 0x45, 0x2a,
1664 0x4c, 0x8b, 0x78, 0x2a,
Mike Kleinc15c9362020-04-16 11:10:36 -05001665 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1666 0x48, 0x89, 0xc8,
Mike Kleinc322f632020-01-13 16:18:58 -06001667 });
1668
Mike Klein9f4df802019-06-24 18:47:16 -04001669 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1670
1671 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001672 a.and16b(A::v4, A::v3, A::v1);
1673 a.orr16b(A::v4, A::v3, A::v1);
1674 a.eor16b(A::v4, A::v3, A::v1);
1675 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001676 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001677 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001678
1679 a.add4s(A::v4, A::v3, A::v1);
1680 a.sub4s(A::v4, A::v3, A::v1);
1681 a.mul4s(A::v4, A::v3, A::v1);
1682
Mike Klein97afd2e2019-10-16 14:11:27 -05001683 a.cmeq4s(A::v4, A::v3, A::v1);
1684 a.cmgt4s(A::v4, A::v3, A::v1);
1685
Mike Klein65809142019-06-25 09:44:02 -04001686 a.sub8h(A::v4, A::v3, A::v1);
1687 a.mul8h(A::v4, A::v3, A::v1);
1688
Mike Klein9f4df802019-06-24 18:47:16 -04001689 a.fadd4s(A::v4, A::v3, A::v1);
1690 a.fsub4s(A::v4, A::v3, A::v1);
1691 a.fmul4s(A::v4, A::v3, A::v1);
1692 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001693 a.fmin4s(A::v4, A::v3, A::v1);
1694 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein7c0332c2020-03-05 14:18:04 -06001695 a.fneg4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001696
Mike Klein65809142019-06-25 09:44:02 -04001697 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001698 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001699
1700 a.fcmeq4s(A::v4, A::v3, A::v1);
1701 a.fcmgt4s(A::v4, A::v3, A::v1);
1702 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001703 },{
Mike Klein65809142019-06-25 09:44:02 -04001704 0x64,0x1c,0x21,0x4e,
1705 0x64,0x1c,0xa1,0x4e,
1706 0x64,0x1c,0x21,0x6e,
1707 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001708 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001709 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001710
1711 0x64,0x84,0xa1,0x4e,
1712 0x64,0x84,0xa1,0x6e,
1713 0x64,0x9c,0xa1,0x4e,
1714
Mike Klein97afd2e2019-10-16 14:11:27 -05001715 0x64,0x8c,0xa1,0x6e,
1716 0x64,0x34,0xa1,0x4e,
1717
Mike Klein65809142019-06-25 09:44:02 -04001718 0x64,0x84,0x61,0x6e,
1719 0x64,0x9c,0x61,0x4e,
1720
Mike Klein9f4df802019-06-24 18:47:16 -04001721 0x64,0xd4,0x21,0x4e,
1722 0x64,0xd4,0xa1,0x4e,
1723 0x64,0xdc,0x21,0x6e,
1724 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001725 0x64,0xf4,0xa1,0x4e,
1726 0x64,0xf4,0x21,0x4e,
Mike Klein7c0332c2020-03-05 14:18:04 -06001727 0x64,0xf8,0xa0,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001728
Mike Klein65809142019-06-25 09:44:02 -04001729 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001730 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001731
1732 0x64,0xe4,0x21,0x4e,
1733 0x64,0xe4,0xa1,0x6e,
1734 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001735 });
1736
1737 test_asm(r, [&](A& a) {
1738 a.shl4s(A::v4, A::v3, 0);
1739 a.shl4s(A::v4, A::v3, 1);
1740 a.shl4s(A::v4, A::v3, 8);
1741 a.shl4s(A::v4, A::v3, 16);
1742 a.shl4s(A::v4, A::v3, 31);
1743
1744 a.sshr4s(A::v4, A::v3, 1);
1745 a.sshr4s(A::v4, A::v3, 8);
1746 a.sshr4s(A::v4, A::v3, 31);
1747
1748 a.ushr4s(A::v4, A::v3, 1);
1749 a.ushr4s(A::v4, A::v3, 8);
1750 a.ushr4s(A::v4, A::v3, 31);
1751
1752 a.ushr8h(A::v4, A::v3, 1);
1753 a.ushr8h(A::v4, A::v3, 8);
1754 a.ushr8h(A::v4, A::v3, 15);
1755 },{
1756 0x64,0x54,0x20,0x4f,
1757 0x64,0x54,0x21,0x4f,
1758 0x64,0x54,0x28,0x4f,
1759 0x64,0x54,0x30,0x4f,
1760 0x64,0x54,0x3f,0x4f,
1761
1762 0x64,0x04,0x3f,0x4f,
1763 0x64,0x04,0x38,0x4f,
1764 0x64,0x04,0x21,0x4f,
1765
1766 0x64,0x04,0x3f,0x6f,
1767 0x64,0x04,0x38,0x6f,
1768 0x64,0x04,0x21,0x6f,
1769
1770 0x64,0x04,0x1f,0x6f,
1771 0x64,0x04,0x18,0x6f,
1772 0x64,0x04,0x11,0x6f,
1773 });
1774
1775 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001776 a.sli4s(A::v4, A::v3, 0);
1777 a.sli4s(A::v4, A::v3, 1);
1778 a.sli4s(A::v4, A::v3, 8);
1779 a.sli4s(A::v4, A::v3, 16);
1780 a.sli4s(A::v4, A::v3, 31);
1781 },{
1782 0x64,0x54,0x20,0x6f,
1783 0x64,0x54,0x21,0x6f,
1784 0x64,0x54,0x28,0x6f,
1785 0x64,0x54,0x30,0x6f,
1786 0x64,0x54,0x3f,0x6f,
1787 });
1788
1789 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001790 a.scvtf4s (A::v4, A::v3);
1791 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001792 a.fcvtns4s(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001793 },{
1794 0x64,0xd8,0x21,0x4e,
1795 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001796 0x64,0xa8,0x21,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001797 });
Mike Klein15a368d2019-06-26 10:21:12 -04001798
1799 test_asm(r, [&](A& a) {
Mike Kleinb8e041e2020-04-17 11:30:29 -05001800 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1801 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1802 a.strq(A::v1, A::sp); // str q1, [sp]
1803 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
1804 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1805 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
1806 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
1807 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1808 a.add (A::sp, A::sp, 32); // add sp, sp, #32
Mike Klein48e78242020-04-17 09:38:09 -05001809 },{
1810 0xff,0x83,0x00,0xd1,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001811 0xe0,0x07,0x80,0x3d,
1812 0xe1,0x03,0x80,0x3d,
1813 0xe0,0x1b,0x00,0xbd,
1814 0xe0,0xbf,0x00,0x3d,
1815 0xe9,0xab,0x40,0x3d,
1816 0xe7,0x2b,0x40,0xbd,
1817 0xe5,0x03,0xc2,0x3d,
Mike Klein48e78242020-04-17 09:38:09 -05001818 0xff,0x83,0x00,0x91,
1819 });
1820
1821 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001822 a.brk(0);
1823 a.brk(65535);
1824
Mike Klein15a368d2019-06-26 10:21:12 -04001825 a.ret(A::x30); // Conventional ret using link register.
1826 a.ret(A::x13); // Can really return using any register if we like.
1827
1828 a.add(A::x2, A::x2, 4);
1829 a.add(A::x3, A::x2, 32);
1830
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001831 a.sub(A::x2, A::x2, 4);
1832 a.sub(A::x3, A::x2, 32);
1833
Mike Klein15a368d2019-06-26 10:21:12 -04001834 a.subs(A::x2, A::x2, 4);
1835 a.subs(A::x3, A::x2, 32);
1836
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001837 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1838 a.cmp(A::x2, 4);
1839
Mike Kleinc74db792020-05-11 11:57:12 -05001840 A::Label l;
1841 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001842 a.bne(&l);
1843 a.bne(&l);
1844 a.blt(&l);
1845 a.b(&l);
1846 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001847 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -04001848 },{
Mike Klein37be7712019-11-13 13:19:01 -06001849 0x00,0x00,0x20,0xd4,
1850 0xe0,0xff,0x3f,0xd4,
1851
Mike Klein15a368d2019-06-26 10:21:12 -04001852 0xc0,0x03,0x5f,0xd6,
1853 0xa0,0x01,0x5f,0xd6,
1854
1855 0x42,0x10,0x00,0x91,
1856 0x43,0x80,0x00,0x91,
1857
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001858 0x42,0x10,0x00,0xd1,
1859 0x43,0x80,0x00,0xd1,
1860
Mike Klein15a368d2019-06-26 10:21:12 -04001861 0x42,0x10,0x00,0xf1,
1862 0x43,0x80,0x00,0xf1,
1863
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001864 0x5f,0x10,0x00,0xf1,
1865 0x5f,0x10,0x00,0xf1,
1866
1867 0x01,0x00,0x00,0x54, // b.ne #0
1868 0xe1,0xff,0xff,0x54, // b.ne #-4
1869 0xcb,0xff,0xff,0x54, // b.lt #-8
1870 0xae,0xff,0xff,0x54, // b.al #-12
1871 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1872 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -04001873 });
Mike Kleine51632e2019-06-26 14:47:43 -04001874
Mike Kleince7b88c2019-07-11 14:06:40 -05001875 // Can we cbz() to a not-yet-defined label?
1876 test_asm(r, [&](A& a) {
1877 A::Label l;
1878 a.cbz(A::x2, &l);
1879 a.add(A::x3, A::x2, 32);
1880 a.label(&l);
1881 a.ret(A::x30);
1882 },{
1883 0x42,0x00,0x00,0xb4, // cbz x2, #8
1884 0x43,0x80,0x00,0x91, // add x3, x2, #32
1885 0xc0,0x03,0x5f,0xd6, // ret
1886 });
1887
1888 // If we start a label as a backward label,
1889 // can we redefine it to be a future label?
1890 // (Not sure this is useful... just want to test it works.)
1891 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001892 A::Label l1;
1893 a.label(&l1);
Mike Kleince7b88c2019-07-11 14:06:40 -05001894 a.add(A::x3, A::x2, 32);
1895 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1896
Mike Kleinc74db792020-05-11 11:57:12 -05001897 A::Label l2; // Start off the same...
1898 a.label(&l2);
Mike Kleince7b88c2019-07-11 14:06:40 -05001899 a.add(A::x3, A::x2, 32);
1900 a.cbz(A::x2, &l2); // Looks like this will go backward...
1901 a.add(A::x2, A::x2, 4);
1902 a.add(A::x3, A::x2, 32);
1903 a.label(&l2); // But no... actually forward! What a switcheroo!
1904 },{
1905 0x43,0x80,0x00,0x91, // add x3, x2, #32
1906 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1907
1908 0x43,0x80,0x00,0x91, // add x3, x2, #32
1909 0x62,0x00,0x00,0xb4, // cbz x2, #12
1910 0x42,0x10,0x00,0x91, // add x2, x2, #4
1911 0x43,0x80,0x00,0x91, // add x3, x2, #32
1912 });
1913
Mike Klein81d52672019-07-30 11:11:09 -05001914 // Loading from a label on ARM.
1915 test_asm(r, [&](A& a) {
1916 A::Label fore,aft;
1917 a.label(&fore);
1918 a.word(0x01234567);
1919 a.ldrq(A::v1, &fore);
1920 a.ldrq(A::v2, &aft);
1921 a.label(&aft);
1922 a.word(0x76543210);
1923 },{
1924 0x67,0x45,0x23,0x01,
1925 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1926 0x22,0x00,0x00,0x9c, // ldr q2, #4
1927 0x10,0x32,0x54,0x76,
1928 });
1929
Mike Kleine51632e2019-06-26 14:47:43 -04001930 test_asm(r, [&](A& a) {
1931 a.ldrq(A::v0, A::x8);
1932 a.strq(A::v0, A::x8);
1933 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001934 0x00,0x01,0xc0,0x3d,
1935 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001936 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001937
1938 test_asm(r, [&](A& a) {
1939 a.xtns2h(A::v0, A::v0);
1940 a.xtnh2b(A::v0, A::v0);
1941 a.strs (A::v0, A::x0);
1942
1943 a.ldrs (A::v0, A::x0);
1944 a.uxtlb2h(A::v0, A::v0);
1945 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001946
1947 a.uminv4s(A::v3, A::v4);
1948 a.fmovs (A::x3, A::v4); // fmov w3,s4
Mike Klein1fa149a2019-07-01 11:18:08 -05001949 },{
1950 0x00,0x28,0x61,0x0e,
1951 0x00,0x28,0x21,0x0e,
1952 0x00,0x00,0x00,0xbd,
1953
1954 0x00,0x00,0x40,0xbd,
1955 0x00,0xa4,0x08,0x2f,
1956 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001957
1958 0x83,0xa8,0xb1,0x6e,
1959 0x83,0x00,0x26,0x1e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001960 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001961
1962 test_asm(r, [&](A& a) {
1963 a.ldrb(A::v0, A::x8);
1964 a.strb(A::v0, A::x8);
1965 },{
1966 0x00,0x01,0x40,0x3d,
1967 0x00,0x01,0x00,0x3d,
1968 });
Mike Klein81d52672019-07-30 11:11:09 -05001969
1970 test_asm(r, [&](A& a) {
1971 a.tbl(A::v0, A::v1, A::v2);
1972 },{
1973 0x20,0x00,0x02,0x4e,
1974 });
Mike Klein05642042019-06-18 12:16:06 -05001975}
Mike Reedbcb46c02020-03-23 17:51:01 -04001976
1977DEF_TEST(SkVM_approx_math, r) {
1978 auto eval = [](int N, float values[], auto fn) {
1979 skvm::Builder b;
1980 skvm::Arg inout = b.varying<float>();
1981
1982 b.storeF(inout, fn(&b, b.loadF(inout)));
1983
1984 b.done().eval(N, values);
1985 };
1986
1987 auto compare = [r](int N, const float values[], const float expected[]) {
1988 for (int i = 0; i < N; ++i) {
1989 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1990 }
1991 };
1992
1993 // log2
1994 {
1995 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1996 constexpr int N = SK_ARRAY_COUNT(values);
1997 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1998 return b->approx_log2(v);
1999 });
2000 const float expected[] = {-2, -1, 0, 1, 2, 3};
2001 compare(N, values, expected);
2002 }
2003
2004 // pow2
2005 {
2006 float values[] = {-2, -1, 0, 1, 2, 3};
2007 constexpr int N = SK_ARRAY_COUNT(values);
2008 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2009 return b->approx_pow2(v);
2010 });
2011 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
2012 compare(N, values, expected);
2013 }
2014
2015 // powf -- x^0.5
2016 {
2017 float bases[] = {0, 1, 4, 9, 16};
2018 constexpr int N = SK_ARRAY_COUNT(bases);
2019 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2020 return b->approx_powf(base, b->splat(0.5f));
2021 });
2022 const float expected[] = {0, 1, 2, 3, 4};
2023 compare(N, bases, expected);
2024 }
2025 // powf -- 3^x
2026 {
2027 float exps[] = {-2, -1, 0, 1, 2};
2028 constexpr int N = SK_ARRAY_COUNT(exps);
2029 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2030 return b->approx_powf(b->splat(3.0f), exp);
2031 });
2032 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
2033 compare(N, exps, expected);
2034 }
Mike Reed82ff25e2020-04-07 13:51:41 -04002035
Mike Reedd468a162020-04-11 14:14:00 -04002036 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04002037 skvm::Builder b;
2038 skvm::Arg inout = b.varying<float>();
2039 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04002040 float actual = arg;
2041 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04002042
Mike Reedd468a162020-04-11 14:14:00 -04002043 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04002044
2045 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04002046 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04002047 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04002048 }
Mike Reed1b84ef22020-04-13 17:56:24 -04002049 return err;
2050 };
2051
2052 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2053 skvm::Builder b;
2054 skvm::Arg in0 = b.varying<float>();
2055 skvm::Arg in1 = b.varying<float>();
2056 skvm::Arg out = b.varying<float>();
2057 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2058 float actual;
2059 b.done().eval(1, &arg0, &arg1, &actual);
2060
2061 float err = std::abs(actual - expected);
2062
2063 if (err > tolerance) {
2064 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2065 REPORTER_ASSERT(r, true);
2066 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002067 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04002068 };
2069
Mike Reed801ba0d2020-04-10 12:37:36 -04002070 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04002071 {
2072 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04002073 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04002074 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2075 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2076 return approx_sin(x);
2077 });
2078 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2079 return approx_cos(x);
2080 });
2081 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002082
2083 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2084 // so bring in the domain a little.
2085 constexpr float eps = 0.16f;
2086 float err = 0;
2087 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2088 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2089 return approx_tan(x);
2090 });
2091 // try again with some multiples of P, to check our periodicity
2092 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2093 return approx_tan(x + 3*P);
2094 });
2095 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2096 return approx_tan(x - 3*P);
2097 });
2098 }
Mike Reedd468a162020-04-11 14:14:00 -04002099 if (0) { SkDebugf("tan error %g\n", err); }
2100 }
2101
2102 // asin, acos, atan
2103 {
2104 constexpr float tol = 0.00175f;
2105 float err = 0;
2106 for (float x = -1; x <= 1; x += 1.0f/64) {
2107 err += test(x, asin(x), tol, [](skvm::F32 x) {
2108 return approx_asin(x);
2109 });
2110 test(x, acos(x), tol, [](skvm::F32 x) {
2111 return approx_acos(x);
2112 });
Mike Reed801ba0d2020-04-10 12:37:36 -04002113 }
Mike Reedd468a162020-04-11 14:14:00 -04002114 if (0) { SkDebugf("asin error %g\n", err); }
2115
2116 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04002117 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04002118 err += test(x, atan(x), tol, [](skvm::F32 x) {
2119 return approx_atan(x);
2120 });
2121 }
2122 if (0) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04002123
2124 for (float y = -3; y <= 3; y += 1) {
2125 for (float x = -3; x <= 3; x += 1) {
2126 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
Mike Klein962020f2020-05-01 10:35:18 -05002127 return approx_atan2(y,x);
Mike Reed1b84ef22020-04-13 17:56:24 -04002128 });
2129 }
2130 }
2131 if (0) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04002132 }
Mike Reedbcb46c02020-03-23 17:51:01 -04002133}
Mike Klein210288f2020-04-08 11:31:07 -05002134
2135DEF_TEST(SkVM_min_max, r) {
2136 // min() and max() have subtle behavior when one argument is NaN and
2137 // the other isn't. It's not sound to blindly swap their arguments.
2138 //
2139 // All backends must behave like std::min() and std::max(), which are
2140 //
2141 // min(x,y) = y<x ? y : x
2142 // max(x,y) = x<y ? y : x
2143
2144 // ±NaN, ±0, ±1, ±inf
2145 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2146 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2147
2148 float f[8];
2149 memcpy(f, bits, sizeof(bits));
2150
2151 auto identical = [&](float x, float y) {
2152 uint32_t X,Y;
2153 memcpy(&X, &x, 4);
2154 memcpy(&Y, &y, 4);
2155 return X == Y;
2156 };
2157
2158 // Test min/max with non-constant x, non-constant y.
2159 // (Whether x and y are varying or uniform shouldn't make any difference.)
2160 {
2161 skvm::Builder b;
2162 {
2163 skvm::Arg src = b.varying<float>(),
2164 mn = b.varying<float>(),
2165 mx = b.varying<float>();
2166
2167 skvm::F32 x = b.loadF(src),
2168 y = b.uniformF(b.uniform(), 0);
2169
2170 b.storeF(mn, b.min(x,y));
2171 b.storeF(mx, b.max(x,y));
2172 }
2173
Mike Klein10fc1e62020-04-13 11:57:05 -05002174 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002175 float mn[8], mx[8];
2176 for (int i = 0; i < 8; i++) {
2177 // min() and max() everything with f[i].
2178 program.eval(8, f,mn,mx, &f[i]);
2179
2180 for (int j = 0; j < 8; j++) {
2181 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2182 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2183 }
2184 }
2185 });
2186 }
2187
2188 // Test each with constant on the right.
2189 for (int i = 0; i < 8; i++) {
2190 skvm::Builder b;
2191 {
2192 skvm::Arg src = b.varying<float>(),
2193 mn = b.varying<float>(),
2194 mx = b.varying<float>();
2195
2196 skvm::F32 x = b.loadF(src),
2197 y = b.splat(f[i]);
2198
2199 b.storeF(mn, b.min(x,y));
2200 b.storeF(mx, b.max(x,y));
2201 }
2202
Mike Klein10fc1e62020-04-13 11:57:05 -05002203 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002204 float mn[8], mx[8];
2205 program.eval(8, f,mn,mx);
2206 for (int j = 0; j < 8; j++) {
2207 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2208 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2209 }
2210 });
2211 }
2212
2213 // Test each with constant on the left.
2214 for (int i = 0; i < 8; i++) {
2215 skvm::Builder b;
2216 {
2217 skvm::Arg src = b.varying<float>(),
2218 mn = b.varying<float>(),
2219 mx = b.varying<float>();
2220
2221 skvm::F32 x = b.splat(f[i]),
2222 y = b.loadF(src);
2223
2224 b.storeF(mn, b.min(x,y));
2225 b.storeF(mx, b.max(x,y));
2226 }
2227
Mike Klein10fc1e62020-04-13 11:57:05 -05002228 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002229 float mn[8], mx[8];
2230 program.eval(8, f,mn,mx);
2231 for (int j = 0; j < 8; j++) {
2232 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2233 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2234 }
2235 });
2236 }
2237}
Mike Klein4d680cd2020-07-15 09:58:51 -05002238
2239DEF_TEST(SkVM_halfs, r) {
2240 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2241 0xc400,0xb800,0xbc00,0xc000};
2242 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2243 -4.0f,-0.5f,-1.0f,-2.0f};
2244 {
2245 skvm::Builder b;
2246 skvm::Arg src = b.varying<uint16_t>(),
2247 dst = b.varying<float>();
2248 b.storeF(dst, b.from_half(b.load16(src)));
2249
2250 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2251 float dst[8];
2252 program.eval(8, hs, dst);
2253 for (int i = 0; i < 8; i++) {
2254 REPORTER_ASSERT(r, dst[i] == fs[i]);
2255 }
2256 });
2257 }
2258 {
2259 skvm::Builder b;
2260 skvm::Arg src = b.varying<float>(),
2261 dst = b.varying<uint16_t>();
2262 b.store16(dst, b.to_half(b.loadF(src)));
2263
2264 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2265 uint16_t dst[8];
2266 program.eval(8, fs, dst);
2267 for (int i = 0; i < 8; i++) {
2268 REPORTER_ASSERT(r, dst[i] == hs[i]);
2269 }
2270 });
2271 }
2272}
Mike Klein6732da02020-07-16 13:03:18 -05002273
2274DEF_TEST(SkVM_64bit, r) {
2275 uint32_t lo[65],
2276 hi[65];
2277 uint64_t wide[65];
2278 for (int i = 0; i < 65; i++) {
2279 lo[i] = 2*i+0;
2280 hi[i] = 2*i+1;
2281 wide[i] = ((uint64_t)lo[i] << 0)
2282 | ((uint64_t)hi[i] << 32);
2283 }
2284
2285 {
2286 skvm::Builder b;
2287 {
2288 skvm::Arg wide = b.varying<uint64_t>(),
2289 lo = b.varying<int>(),
2290 hi = b.varying<int>();
Mike Klein31367892020-07-30 08:19:12 -05002291 b.store32(lo, b.load64(wide, 0));
2292 b.store32(hi, b.load64(wide, 1));
Mike Klein6732da02020-07-16 13:03:18 -05002293 }
2294 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2295 uint32_t l[65], h[65];
2296 program.eval(65, wide,l,h);
2297 for (int i = 0; i < 65; i++) {
2298 REPORTER_ASSERT(r, l[i] == lo[i]);
2299 REPORTER_ASSERT(r, h[i] == hi[i]);
2300 }
2301 });
2302 }
2303
2304 {
2305 skvm::Builder b;
2306 {
2307 skvm::Arg wide = b.varying<uint64_t>(),
2308 lo = b.varying<int>(),
2309 hi = b.varying<int>();
2310 b.store64(wide, b.load32(lo), b.load32(hi));
2311 }
2312 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2313 uint64_t w[65];
2314 program.eval(65, w,lo,hi);
2315 for (int i = 0; i < 65; i++) {
2316 REPORTER_ASSERT(r, w[i] == wide[i]);
2317 }
2318 });
2319 }
2320}
Mike Kleine942b8c2020-07-21 10:17:14 -05002321
2322DEF_TEST(SkVM_is_NaN_is_finite, r) {
2323 skvm::Builder b;
2324 {
2325 skvm::Arg src = b.varying<float>(),
2326 nan = b.varying<int>(),
2327 fin = b.varying<int>();
2328 b.store32(nan, is_NaN (b.loadF(src)));
2329 b.store32(fin, is_finite(b.loadF(src)));
2330 }
2331 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2332 // ±NaN, ±0, ±1, ±inf
2333 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2334 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2335 uint32_t nan[8], fin[8];
2336 program.eval(8, bits, nan,fin);
2337
2338 for (int i = 0; i < 8; i++) {
2339 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2340 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2341 i == 4 || i == 5) ? 0xffffffff : 0));
2342 }
2343 });
2344}
Mike Klein0cfd5032020-07-28 11:08:27 -05002345
2346DEF_TEST(SkVM_args, r) {
2347 // Test we can handle at least six arguments.
2348 skvm::Builder b;
2349 {
2350 skvm::Arg dst = b.varying<float>(),
2351 A = b.varying<float>(),
2352 B = b.varying<float>(),
2353 C = b.varying<float>(),
2354 D = b.varying<float>(),
2355 E = b.varying<float>();
2356 storeF(dst, b.loadF(A)
2357 + b.loadF(B)
2358 + b.loadF(C)
2359 + b.loadF(D)
2360 + b.loadF(E));
2361 }
2362
2363 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2364 float dst[17],A[17],B[17],C[17],D[17],E[17];
2365 for (int i = 0; i < 17; i++) {
2366 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2367 }
2368 program.eval(17, dst,A,B,C,D,E);
2369 for (int i = 0; i < 17; i++) {
2370 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2371 }
2372 });
2373}
Mike Klein9791e502020-09-15 12:43:38 -05002374
2375DEF_TEST(SkVM_Q14x2, r) {
2376 // Some nice round Q14 test values, from 0.0 out to ±1.0 (0x4000, 0xc000) by 16ths (0x0400).
2377 const uint32_t src[] = {
2378 0x0000'0000, 0xfc00'0400, 0xf800'0800, 0xf400'0c00,
2379 0xf000'1000, 0xec00'1400, 0xe800'1800, 0xe400'1c00,
2380 0xe000'2000, 0xdc00'2400, 0xd800'2800, 0xd400'2c00,
2381 0xd000'3000, 0xcc00'3400, 0xc800'3800, 0xc400'3c00, 0xc000'4000
2382 };
2383 for (int i = 0; i < 17; i++) {
2384 // Just showing our work how we got those values.
2385 int16_t x = i * (+1/16.0f) * 0x4000;
2386 REPORTER_ASSERT(r, src[i] == (uint32_t)(x|-x<<16));
2387 }
2388
2389 // These test cases are essentially mechanically generated to get coverage...
2390 // I've spot checked here and there and things seem correct, but I wouldn't
2391 // be surprised to find that there were bugs. Using nice round numbers to
2392 // avoid having to think about low-bit precision for now.
2393 struct {
2394 skvm::Q14x2 (*fn)(skvm::Q14x2);
2395 uint32_t expected[17];
2396 } cases[] = {
2397 {[](skvm::Q14x2 x) { return x; }, // Just double checking the test harness works.
2398 {0x00000000, 0xfc000400, 0xf8000800, 0xf4000c00,
2399 0xf0001000, 0xec001400, 0xe8001800, 0xe4001c00,
2400 0xe0002000, 0xdc002400, 0xd8002800, 0xd4002c00,
2401 0xd0003000, 0xcc003400, 0xc8003800, 0xc4003c00, 0xc0004000}},
2402
2403 {[](skvm::Q14x2 x) { return x*x; }, // square ±1/16 (0x0400) -> 1/256 (0x0040), etc.
2404 {0x00000000, 0x00400040, 0x01000100, 0x02400240,
2405 0x04000400, 0x06400640, 0x09000900, 0x0c400c40,
2406 0x10001000, 0x14401440, 0x19001900, 0x1e401e40,
2407 0x24002400, 0x2a402a40, 0x31003100, 0x38403840, 0x40004000}},
2408
2409 {[](skvm::Q14x2 x) { return x>>1; }, // divide by 2
2410 {0x00000000, 0xfe000200, 0xfc000400, 0xfa000600,
2411 0xf8000800, 0xf6000a00, 0xf4000c00, 0xf2000e00,
2412 0xf0001000, 0xee001200, 0xec001400, 0xea001600,
2413 0xe8001800, 0xe6001a00, 0xe4001c00, 0xe2001e00, 0xe0002000}},
2414
2415 {[](skvm::Q14x2 x) { return shr(x,1); }, // logical shift by 1
2416 {0x00000000, 0x7e000200, 0x7c000400, 0x7a000600,
2417 0x78000800, 0x76000a00, 0x74000c00, 0x72000e00,
2418 0x70001000, 0x6e001200, 0x6c001400, 0x6a001600,
2419 0x68001800, 0x66001a00, 0x64001c00, 0x62001e00, 0x60002000}},
2420
2421 {[](skvm::Q14x2 x) { return x - (x>>2); }, // 3/4 x, version A
2422 {0x00000000, 0xfd000300, 0xfa000600, 0xf7000900,
2423 0xf4000c00, 0xf1000f00, 0xee001200, 0xeb001500,
2424 0xe8001800, 0xe5001b00, 0xe2001e00, 0xdf002100,
2425 0xdc002400, 0xd9002700, 0xd6002a00, 0xd3002d00, 0xd0003000}},
2426
2427 {[](skvm::Q14x2 x) { return (x>>1) + (x>>2); }, // 3/4 x, version B
2428 {0x00000000, 0xfd000300, 0xfa000600, 0xf7000900,
2429 0xf4000c00, 0xf1000f00, 0xee001200, 0xeb001500,
2430 0xe8001800, 0xe5001b00, 0xe2001e00, 0xdf002100,
2431 0xdc002400, 0xd9002700, 0xd6002a00, 0xd3002d00, 0xd0003000}},
2432
2433 {[](skvm::Q14x2 x) { return ((x>>2) + (x>>3))<<1; }, // 3/4 x, version C
2434 {0x00000000, 0xfd000300, 0xfa000600, 0xf7000900,
2435 0xf4000c00, 0xf1000f00, 0xee001200, 0xeb001500,
2436 0xe8001800, 0xe5001b00, 0xe2001e00, 0xdf002100,
2437 0xdc002400, 0xd9002700, 0xd6002a00, 0xd3002d00, 0xd0003000}},
2438
2439 // TODO: I'm not sure if this one is working correctly or not. Should only work for >=0?
2440 {[](skvm::Q14x2 x) { return unsigned_avg(x, x>>1); }, // 3/4 x, version D
2441 {0x00000000, 0xfd000300, 0xfa000600, 0xf7000900,
2442 0xf4000c00, 0xf1000f00, 0xee001200, 0xeb001500,
2443 0xe8001800, 0xe5001b00, 0xe2001e00, 0xdf002100,
2444 0xdc002400, 0xd9002700, 0xd6002a00, 0xd3002d00, 0xd0003000}},
2445
2446 {[](skvm::Q14x2 x) { return min(x, +0.5f); }, // clamp down to 0x2000, version A
2447 {0x00000000, 0xfc000400, 0xf8000800, 0xf4000c00,
2448 0xf0001000, 0xec001400, 0xe8001800, 0xe4001c00,
2449 0xe0002000, 0xdc002000, 0xd8002000, 0xd4002000,
2450 0xd0002000, 0xcc002000, 0xc8002000, 0xc4002000, 0xc0002000}},
2451
2452 {[](skvm::Q14x2 x) { return select(x < +0.5f, x, +0.5f); }, // clamp down to 0x2000, vB
2453 {0x00000000, 0xfc000400, 0xf8000800, 0xf4000c00,
2454 0xf0001000, 0xec001400, 0xe8001800, 0xe4001c00,
2455 0xe0002000, 0xdc002000, 0xd8002000, 0xd4002000,
2456 0xd0002000, 0xcc002000, 0xc8002000, 0xc4002000, 0xc0002000}},
2457
2458 {[](skvm::Q14x2 x) { return select(x == 1.0f, 0.5f, x); },
2459 {0x00000000, 0xfc000400, 0xf8000800, 0xf4000c00,
2460 0xf0001000, 0xec001400, 0xe8001800, 0xe4001c00,
2461 0xe0002000, 0xdc002400, 0xd8002800, 0xd4002c00,
2462 0xd0003000, 0xcc003400, 0xc8003800, 0xc4003c00, 0xc0002000}},
2463
2464 {[](skvm::Q14x2 x) { return max(x, -0.5f); }, // clamp up to 0xe000
2465 {0x00000000, 0xfc000400, 0xf8000800, 0xf4000c00,
2466 0xf0001000, 0xec001400, 0xe8001800, 0xe4001c00,
2467 0xe0002000, 0xe0002400, 0xe0002800, 0xe0002c00,
2468 0xe0003000, 0xe0003400, 0xe0003800, 0xe0003c00, 0xe0004000}},
2469
2470 // TODO: I had higher hopes for this op until I realized it clamps negative values
2471 // to the upper limit, not zero. Duh. Might end up removing this.
2472 {[](skvm::Q14x2 x) { return unsigned_min(x, 0.5f); }, // clamp around to [0,0x2000]
2473 {0x00000000, 0x20000400, 0x20000800, 0x20000c00,
2474 0x20001000, 0x20001400, 0x20001800, 0x20001c00,
2475 0x20002000, 0x20002000, 0x20002000, 0x20002000,
2476 0x20002000, 0x20002000, 0x20002000, 0x20002000, 0x20002000}},
2477 };
2478
2479 for (const auto& test : cases) {
2480 skvm::Builder b;
2481 {
2482 skvm::Arg dst = b.varying<uint32_t>(),
2483 src = b.varying<uint32_t>();
2484
2485 skvm::Q14x2 x = as_Q14x2(b.load32(src));
2486 store32(dst, as_I32(test.fn(x)));
2487 }
2488
2489 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2490 uint32_t dst[17];
2491 program.eval(17, dst,src);
2492 for (int i = 0; i < 17; i++) {
2493 if (test.expected[16]) {
2494 REPORTER_ASSERT(r, test.expected[i] == dst[i]);
2495 } else {
2496 if (i == 0 || i == 4 || i == 8 || i == 12) SkDebugf("\n");
2497 SkDebugf("0x%08x, ", dst[i]);
2498 }
2499 }
2500 });
2501 }
2502
2503}