blob: d48b632629d8e7fd8aa29674bc61d9f0bb66c54a [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050014#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050015#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050016
Mike Klein7b7077c2019-06-03 17:10:59 -050017using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050018const char* fmt_name(Fmt fmt) {
19 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050020 case Fmt::A8: return "A8";
21 case Fmt::G8: return "G8";
22 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050023 }
24 return "";
25}
26
Mike Klein6b4143e2019-09-18 11:49:29 -050027static void dump(skvm::Builder& builder, SkWStream* o) {
28 skvm::Program program = builder.done();
29 builder.dump(o);
30 o->writeText("\n");
31 program.dump(o);
32 o->writeText("\n");
33}
Mike Klein7e650762019-07-02 15:21:11 -050034
Mike Klein9977efa2019-07-15 12:22:36 -050035template <typename Fn>
Mike Klein10fc1e62020-04-13 11:57:05 -050036static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
Mike Klein10fc1e62020-04-13 11:57:05 -050037 if (program.hasJIT()) {
Mike Kleinb5a30762019-10-16 10:11:56 -050038 test((const skvm::Program&) program);
39 program.dropJIT();
40 }
Mike Klein10fc1e62020-04-13 11:57:05 -050041 test((const skvm::Program&) program);
Mike Kleinb5a30762019-10-16 10:11:56 -050042}
43
44
Mike Klein68c50d02019-05-29 12:57:54 -050045DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050046 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050047
48 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050049 for (int s = 0; s < 3; s++)
50 for (int d = 0; d < 3; d++) {
51 auto srcFmt = (Fmt)s,
52 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050053 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050054
Mike Klein267f5072019-06-03 16:27:46 -050055 buf.writeText(fmt_name(srcFmt));
56 buf.writeText(" over ");
57 buf.writeText(fmt_name(dstFmt));
58 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050059 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050060 }
Mike Klein68c50d02019-05-29 12:57:54 -050061
Mike Klein7b7077c2019-06-03 17:10:59 -050062 // Write the I32 Srcovers also.
63 {
Mike Kleinaab45b52019-07-02 15:39:23 -050064 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050065 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050066 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050067 }
Mike Klein7b7077c2019-06-03 17:10:59 -050068
Mike Kleinf9963112019-08-08 15:13:25 -040069 {
Mike Kleind48488b2019-10-22 12:27:58 -050070 // Demonstrate the value of program reordering.
71 skvm::Builder b;
72 skvm::Arg sp = b.varying<int>(),
73 dp = b.varying<int>();
74
75 skvm::I32 byte = b.splat(0xff);
76
77 skvm::I32 src = b.load32(sp),
78 sr = b.extract(src, 0, byte),
79 sg = b.extract(src, 8, byte),
80 sb = b.extract(src, 16, byte),
81 sa = b.extract(src, 24, byte);
82
83 skvm::I32 dst = b.load32(dp),
84 dr = b.extract(dst, 0, byte),
85 dg = b.extract(dst, 8, byte),
86 db = b.extract(dst, 16, byte),
87 da = b.extract(dst, 24, byte);
88
89 skvm::I32 R = b.add(sr, dr),
90 G = b.add(sg, dg),
91 B = b.add(sb, db),
92 A = b.add(sa, da);
93
94 skvm::I32 rg = b.pack(R, G, 8),
95 ba = b.pack(B, A, 8),
96 rgba = b.pack(rg, ba, 16);
97
98 b.store32(dp, rgba);
99
100 dump(b, &buf);
101 }
102
Mike Klein238105b2020-03-04 17:05:32 -0600103 // Our checked in dump expectations assume we have FMA support.
Mike Klein10fc1e62020-04-13 11:57:05 -0500104 if (skvm::fma_supported()) {
Ben Wagnere8ffb082020-05-04 10:50:08 -0400105 sk_sp<SkData> actual = buf.detachAsData();
106 bool writeActualAsNewExpectation = false;
Mike Klein238105b2020-03-04 17:05:32 -0600107 {
Mike Klein238105b2020-03-04 17:05:32 -0600108 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Ben Wagnere8ffb082020-05-04 10:50:08 -0400109 if (!expected) {
110 ERRORF(r, "Couldn't load SkVMTest.expected.");
111 writeActualAsNewExpectation = true;
Mike Klein267f5072019-06-03 16:27:46 -0500112
Ben Wagnere8ffb082020-05-04 10:50:08 -0400113 } else if (!expected->equals(actual.get())) {
114 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
Adlai Holler684838f2020-05-12 10:41:04 -0400115 (int)expected->size(), expected->data(),
116 (int)actual->size(), actual->data());
Ben Wagnere8ffb082020-05-04 10:50:08 -0400117 writeActualAsNewExpectation = true;
118 }
119 }
120 if (writeActualAsNewExpectation) {
121 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
122 if (out.isValid()) {
123 out.write(actual->data(), actual->size());
Mike Klein77163312019-06-04 13:35:32 -0500124 }
Mike Klein68c50d02019-05-29 12:57:54 -0500125 }
126 }
127
Mike Klein9977efa2019-07-15 12:22:36 -0500128 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500129 uint32_t src[9];
130 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500131
Mike Klein10fc1e62020-04-13 11:57:05 -0500132 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
Mike Klein9977efa2019-07-15 12:22:36 -0500133 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
134 src[i] = 0xbb007733;
135 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500136 }
Mike Klein9977efa2019-07-15 12:22:36 -0500137
138 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
139
140 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
141
142 // dst is probably 0xff2dad72.
143 for (auto got : dst) {
144 auto want = expected;
145 for (int i = 0; i < 4; i++) {
146 uint8_t d = got & 0xff,
147 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500148 if (abs(d-w) >= 2) {
149 SkDebugf("d %02x, w %02x\n", d,w);
150 }
Mike Klein9977efa2019-07-15 12:22:36 -0500151 REPORTER_ASSERT(r, abs(d-w) < 2);
152 got >>= 8;
153 want >>= 8;
154 }
155 }
156 });
Mike Klein3f593792019-06-12 12:54:52 -0500157 };
Mike Klein68c50d02019-05-29 12:57:54 -0500158
Mike Klein37607d42019-07-18 10:17:28 -0500159 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
160 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500161
Mike Klein10fc1e62020-04-13 11:57:05 -0500162 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500163 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500164 uint32_t src[9];
165 uint8_t dst[SK_ARRAY_COUNT(src)];
166
167 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
168 src[i] = 0xbb007733;
169 dst[i] = 0x42;
170 }
171
172 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
173 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500174
175 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
176 SkGetPackedG32(over),
177 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500178 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500179
Mike Klein3f593792019-06-12 12:54:52 -0500180 for (auto got : dst) {
181 REPORTER_ASSERT(r, abs(got-want) < 3);
182 }
Mike Klein9977efa2019-07-15 12:22:36 -0500183 });
Mike Klein68c50d02019-05-29 12:57:54 -0500184
Mike Klein10fc1e62020-04-13 11:57:05 -0500185 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500186 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500187 uint8_t src[256],
188 dst[256];
189 for (int i = 0; i < 256; i++) {
190 src[i] = 255 - i;
191 dst[i] = i;
192 }
193
194 program.eval(256, src, dst);
195
196 for (int i = 0; i < 256; i++) {
197 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
198 SkPackARGB32( i, 0,0,0)));
199 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
200 }
Mike Klein9977efa2019-07-15 12:22:36 -0500201 });
Mike Klein68c50d02019-05-29 12:57:54 -0500202}
Mike Klein81756e42019-06-12 11:36:28 -0500203
Mike Klein7542ab52020-04-02 08:50:16 -0500204DEF_TEST(SkVM_eliminate_dead_code, r) {
205 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400206 {
Mike Klein7542ab52020-04-02 08:50:16 -0500207 skvm::Arg arg = b.varying<int>();
208 skvm::I32 l = b.load32(arg);
209 skvm::I32 a = b.add(l, l);
210 b.add(a, b.splat(7));
211 }
Herb Derbyf20400e2020-03-18 16:11:25 -0400212
Mike Klein7542ab52020-04-02 08:50:16 -0500213 std::vector<skvm::Instruction> program = b.program();
214 REPORTER_ASSERT(r, program.size() == 4);
215
Mike Klein5b701e12020-04-02 10:34:24 -0500216 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -0500217 REPORTER_ASSERT(r, program.size() == 0);
218}
219
220DEF_TEST(SkVM_Usage, r) {
221 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400222 {
Mike Klein7542ab52020-04-02 08:50:16 -0500223 skvm::Arg arg = b.varying<int>(),
224 buf = b.varying<int>();
225 skvm::I32 l = b.load32(arg);
226 skvm::I32 a = b.add(l, l);
227 skvm::I32 s = b.add(a, b.splat(7));
228 b.store32(buf, s);
Herb Derbyf20400e2020-03-18 16:11:25 -0400229 }
Mike Klein7542ab52020-04-02 08:50:16 -0500230
Mike Kleinb7d87902020-04-02 10:14:35 -0500231 skvm::Usage usage{b.program()};
Mike Klein7542ab52020-04-02 08:50:16 -0500232 REPORTER_ASSERT(r, b.program()[0].op == skvm::Op::load32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500233 REPORTER_ASSERT(r, usage[0].size() == 2);
Mike Klein7542ab52020-04-02 08:50:16 -0500234 REPORTER_ASSERT(r, b.program()[1].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500235 REPORTER_ASSERT(r, usage[1].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500236 REPORTER_ASSERT(r, b.program()[2].op == skvm::Op::splat);
Mike Kleinb7d87902020-04-02 10:14:35 -0500237 REPORTER_ASSERT(r, usage[2].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500238 REPORTER_ASSERT(r, b.program()[3].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500239 REPORTER_ASSERT(r, usage[3].size() == 1);
Herb Derbyf20400e2020-03-18 16:11:25 -0400240}
241
Mike Klein9fdadb92019-07-30 12:30:13 -0500242DEF_TEST(SkVM_Pointless, r) {
243 // Let's build a program with no memory arguments.
244 // It should all be pegged as dead code, but we should be able to "run" it.
245 skvm::Builder b;
246 {
247 b.add(b.splat(5.0f),
248 b.splat(4.0f));
249 }
250
Mike Klein10fc1e62020-04-13 11:57:05 -0500251 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -0500252 for (int N = 0; N < 64; N++) {
253 program.eval(N);
254 }
255 });
256
Mike Kleined9b1f12020-02-06 13:02:32 -0600257 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -0500258 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -0500259 }
260}
261
Mike Klein10fc1e62020-04-13 11:57:05 -0500262DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -0600263 skvm::Builder b;
264 b.store32(b.varying<int>(), b.splat(42));
265
Mike Klein10fc1e62020-04-13 11:57:05 -0500266 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
267 int buf[18];
268 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -0600269
Mike Klein10fc1e62020-04-13 11:57:05 -0500270 p.eval(17, buf);
271 for (int i = 0; i < 17; i++) {
272 REPORTER_ASSERT(r, buf[i] == 42);
273 }
274 REPORTER_ASSERT(r, buf[17] == 47);
275 });
Mike Kleinb6149312020-02-26 13:04:23 -0600276}
Mike Klein11efa182020-02-27 12:04:37 -0600277
Mike Klein10fc1e62020-04-13 11:57:05 -0500278DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -0600279 skvm::Builder b;
280 {
281 auto src = b.varying<int>(),
282 dst = b.varying<int>();
283 b.store32(dst, b.load32(src));
284 }
285
Mike Klein10fc1e62020-04-13 11:57:05 -0500286 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
287 int src[] = {1,2,3,4,5,6,7,8,9},
288 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -0600289
Mike Klein10fc1e62020-04-13 11:57:05 -0500290 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
291 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
292 REPORTER_ASSERT(r, dst[i] == src[i]);
293 }
294 size_t i = SK_ARRAY_COUNT(src)-1;
295 REPORTER_ASSERT(r, dst[i] == 0);
296 });
Mike Klein11efa182020-02-27 12:04:37 -0600297}
Mike Kleinb6149312020-02-26 13:04:23 -0600298
Mike Klein81756e42019-06-12 11:36:28 -0500299DEF_TEST(SkVM_LoopCounts, r) {
300 // Make sure we cover all the exact N we want.
301
Mike Klein9977efa2019-07-15 12:22:36 -0500302 // buf[i] += 1
303 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500304 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500305 b.store32(arg,
306 b.add(b.splat(1),
307 b.load32(arg)));
308
Mike Klein10fc1e62020-04-13 11:57:05 -0500309 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500310 int buf[64];
311 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500312 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
313 buf[i] = i;
314 }
315 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500316
Mike Klein9977efa2019-07-15 12:22:36 -0500317 for (int i = 0; i < N; i++) {
318 REPORTER_ASSERT(r, buf[i] == i+1);
319 }
320 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
321 REPORTER_ASSERT(r, buf[i] == i);
322 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500323 }
324 });
Mike Klein81756e42019-06-12 11:36:28 -0500325}
Mike Klein05642042019-06-18 12:16:06 -0500326
Mike Kleinb2b6a992020-01-13 16:34:30 -0600327DEF_TEST(SkVM_gather32, r) {
328 skvm::Builder b;
329 {
330 skvm::Arg uniforms = b.uniform(),
331 buf = b.varying<int>();
332 skvm::I32 x = b.load32(buf);
333 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
334 }
335
Mike Klein10fc1e62020-04-13 11:57:05 -0500336 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600337 const int img[] = {12,34,56,78, 90,98,76,54};
338
339 int buf[20];
340 for (int i = 0; i < 20; i++) {
341 buf[i] = i;
342 }
343
344 struct Uniforms {
345 const int* img;
346 } uniforms{img};
347
348 program.eval(20, &uniforms, buf);
349 int i = 0;
350 REPORTER_ASSERT(r, buf[i] == 12); i++;
351 REPORTER_ASSERT(r, buf[i] == 34); i++;
352 REPORTER_ASSERT(r, buf[i] == 56); i++;
353 REPORTER_ASSERT(r, buf[i] == 78); i++;
354 REPORTER_ASSERT(r, buf[i] == 90); i++;
355 REPORTER_ASSERT(r, buf[i] == 98); i++;
356 REPORTER_ASSERT(r, buf[i] == 76); i++;
357 REPORTER_ASSERT(r, buf[i] == 54); i++;
358
359 REPORTER_ASSERT(r, buf[i] == 12); i++;
360 REPORTER_ASSERT(r, buf[i] == 34); i++;
361 REPORTER_ASSERT(r, buf[i] == 56); i++;
362 REPORTER_ASSERT(r, buf[i] == 78); i++;
363 REPORTER_ASSERT(r, buf[i] == 90); i++;
364 REPORTER_ASSERT(r, buf[i] == 98); i++;
365 REPORTER_ASSERT(r, buf[i] == 76); i++;
366 REPORTER_ASSERT(r, buf[i] == 54); i++;
367
368 REPORTER_ASSERT(r, buf[i] == 12); i++;
369 REPORTER_ASSERT(r, buf[i] == 34); i++;
370 REPORTER_ASSERT(r, buf[i] == 56); i++;
371 REPORTER_ASSERT(r, buf[i] == 78); i++;
372 });
373}
374
Mike Klein81d52672019-07-30 11:11:09 -0500375DEF_TEST(SkVM_gathers, r) {
376 skvm::Builder b;
377 {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600378 skvm::Arg uniforms = b.uniform(),
379 buf32 = b.varying<int>(),
380 buf16 = b.varying<uint16_t>(),
381 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500382
383 skvm::I32 x = b.load32(buf32);
384
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600385 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
386 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
387 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500388 }
389
Mike Klein10fc1e62020-04-13 11:57:05 -0500390 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500391 const int img[] = {12,34,56,78, 90,98,76,54};
392
393 constexpr int N = 20;
394 int buf32[N];
395 uint16_t buf16[N];
396 uint8_t buf8 [N];
397
398 for (int i = 0; i < 20; i++) {
399 buf32[i] = i;
400 }
401
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600402 struct Uniforms {
403 const int* img;
404 } uniforms{img};
405
406 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500407 int i = 0;
408 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
409 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
410 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
411 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
412 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
413 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
414 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
415 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
416
417 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
418 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
419 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
420 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
421 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
422 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
423 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
424 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
425
426 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
427 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
428 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
429 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
430 });
431}
432
Mike Klein21e85eb2020-04-17 13:57:13 -0500433DEF_TEST(SkVM_gathers2, r) {
434 skvm::Builder b;
435 {
436 skvm::Arg uniforms = b.uniform(),
437 buf32 = b.varying<int>(),
438 buf16 = b.varying<uint16_t>(),
439 buf8 = b.varying<uint8_t>();
440
441 skvm::I32 x = b.load32(buf32);
442
443 b.store32(buf32, b.gather32(uniforms,0, x));
444 b.store16(buf16, b.gather16(uniforms,0, x));
445 b.store8 (buf8 , b.gather8 (uniforms,0, x));
446 }
447
448 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
449 uint8_t img[256];
450 for (int i = 0; i < 256; i++) {
451 img[i] = i;
452 }
453
454 int buf32[64];
455 uint16_t buf16[64];
456 uint8_t buf8 [64];
457
458 for (int i = 0; i < 64; i++) {
459 buf32[i] = (i*47)&63;
460 buf16[i] = 0;
461 buf8 [i] = 0;
462 }
463
464 struct Uniforms {
465 const uint8_t* img;
466 } uniforms{img};
467
468 program.eval(64, &uniforms, buf32, buf16, buf8);
469
470 for (int i = 0; i < 64; i++) {
471 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
472 }
473
474 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
475 REPORTER_ASSERT(r, buf16[63] == 0x2322);
476
477 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
478 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
479 });
480}
481
Mike Klein81d52672019-07-30 11:11:09 -0500482DEF_TEST(SkVM_bitops, r) {
483 skvm::Builder b;
484 {
485 skvm::Arg ptr = b.varying<int>();
486
487 skvm::I32 x = b.load32(ptr);
488
Mike Klein4067a942020-04-05 10:25:32 -0500489 x = b.bit_and (x, b.splat(0xf1)); // 0x40
490 x = b.bit_or (x, b.splat(0x80)); // 0xc0
491 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
492 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500493
494 x = b.shl(x, 28); // 0xe000'0000
495 x = b.sra(x, 28); // 0xffff'fffe
496 x = b.shr(x, 1); // 0x7fff'ffff
497
498 b.store32(ptr, x);
499 }
500
Mike Klein10fc1e62020-04-13 11:57:05 -0500501 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500502 int x = 0x42;
503 program.eval(1, &x);
504 REPORTER_ASSERT(r, x == 0x7fff'ffff);
505 });
506}
507
Mike Klein4067a942020-04-05 10:25:32 -0500508DEF_TEST(SkVM_select_is_NaN, r) {
509 skvm::Builder b;
510 {
511 skvm::Arg src = b.varying<float>(),
512 dst = b.varying<float>();
513
514 skvm::F32 x = b.loadF(src);
515 x = select(is_NaN(x), b.splat(0.0f)
516 , x);
517 b.storeF(dst, x);
518 }
519
520 std::vector<skvm::OptimizedInstruction> program = b.optimize();
521 REPORTER_ASSERT(r, program.size() == 4);
522 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
523 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
524 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
525 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
526
Mike Klein10fc1e62020-04-13 11:57:05 -0500527 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500528 // ±NaN, ±0, ±1, ±inf
529 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
530 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
531 uint32_t dst[SK_ARRAY_COUNT(src)];
532 program.eval(SK_ARRAY_COUNT(src), src, dst);
533
534 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
535 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
536 }
537 });
538}
539
Mike Klein81d52672019-07-30 11:11:09 -0500540DEF_TEST(SkVM_f32, r) {
541 skvm::Builder b;
542 {
543 skvm::Arg arg = b.varying<float>();
544
Mike Reedf5ff4c22020-03-23 14:57:53 -0400545 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500546 y = b.add(x,x), // y = 2x
547 z = b.sub(y,x), // z = 2x-x = x
548 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400549 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500550 }
551
Mike Klein10fc1e62020-04-13 11:57:05 -0500552 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500553 float buf[] = { 1,2,3,4,5,6,7,8,9 };
554 program.eval(SK_ARRAY_COUNT(buf), buf);
555 for (float v : buf) {
556 REPORTER_ASSERT(r, v == 1.0f);
557 }
558 });
559}
560
561DEF_TEST(SkVM_cmp_i32, r) {
562 skvm::Builder b;
563 {
564 skvm::I32 x = b.load32(b.varying<int>());
565
566 auto to_bit = [&](int shift, skvm::I32 mask) {
567 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
568 };
569
570 skvm::I32 m = b.splat(0);
571 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
572 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
573 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
574 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
575 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
576 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
577
578 b.store32(b.varying<int>(), m);
579 }
Mike Klein10fc1e62020-04-13 11:57:05 -0500580 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500581 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
582 int out[SK_ARRAY_COUNT(in)];
583
584 program.eval(SK_ARRAY_COUNT(in), in, out);
585
586 REPORTER_ASSERT(r, out[0] == 0b001111);
587 REPORTER_ASSERT(r, out[1] == 0b001100);
588 REPORTER_ASSERT(r, out[2] == 0b001010);
589 REPORTER_ASSERT(r, out[3] == 0b001010);
590 REPORTER_ASSERT(r, out[4] == 0b000010);
591 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
592 REPORTER_ASSERT(r, out[i] == 0b110010);
593 }
594 });
595}
596
597DEF_TEST(SkVM_cmp_f32, r) {
598 skvm::Builder b;
599 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400600 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500601
602 auto to_bit = [&](int shift, skvm::I32 mask) {
603 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
604 };
605
606 skvm::I32 m = b.splat(0);
607 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
608 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
609 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
610 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
611 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
612 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
613
614 b.store32(b.varying<int>(), m);
615 }
616
Mike Klein10fc1e62020-04-13 11:57:05 -0500617 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500618 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
619 int out[SK_ARRAY_COUNT(in)];
620
621 program.eval(SK_ARRAY_COUNT(in), in, out);
622
623 REPORTER_ASSERT(r, out[0] == 0b001111);
624 REPORTER_ASSERT(r, out[1] == 0b001100);
625 REPORTER_ASSERT(r, out[2] == 0b001010);
626 REPORTER_ASSERT(r, out[3] == 0b001010);
627 REPORTER_ASSERT(r, out[4] == 0b000010);
628 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
629 REPORTER_ASSERT(r, out[i] == 0b110010);
630 }
631 });
632}
633
Mike Klein14548b92020-02-28 14:02:29 -0600634DEF_TEST(SkVM_index, r) {
635 skvm::Builder b;
636 b.store32(b.varying<int>(), b.index());
637
Mike Klein10fc1e62020-04-13 11:57:05 -0500638 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600639 int buf[23];
640 program.eval(SK_ARRAY_COUNT(buf), buf);
641 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
642 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
643 }
644 });
645}
646
Mike Klein4a131192019-07-19 13:56:41 -0500647DEF_TEST(SkVM_mad, r) {
648 // This program is designed to exercise the tricky corners of instruction
649 // and register selection for Op::mad_f32.
650
651 skvm::Builder b;
652 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500653 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500654
655 skvm::F32 x = b.to_f32(b.load32(arg)),
656 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
657 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
658 w = b.mad(z,z,y), // w can alias z but not y.
659 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600660 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500661 }
662
Mike Klein10fc1e62020-04-13 11:57:05 -0500663 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500664 int x = 2;
665 program.eval(1, &x);
666 // x = 2
667 // y = 2*2 + 2 = 6
668 // z = 6*6 + 2 = 38
669 // w = 38*38 + 6 = 1450
670 // v = 1450*6 + 1450 = 10150
671 REPORTER_ASSERT(r, x == 10150);
672 });
673}
674
Mike Klein7c0332c2020-03-05 14:18:04 -0600675DEF_TEST(SkVM_fms, r) {
676 // Create a pattern that can be peepholed into an Op::fms_f32.
677 skvm::Builder b;
678 {
679 skvm::Arg arg = b.varying<int>();
680
681 skvm::F32 x = b.to_f32(b.load32(arg)),
682 v = b.sub(b.mul(x, b.splat(2.0f)),
683 b.splat(1.0f));
684 b.store32(arg, b.trunc(v));
685 }
686
Mike Klein10fc1e62020-04-13 11:57:05 -0500687 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600688 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
689 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
690
691 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
692 REPORTER_ASSERT(r, buf[i] = 2*i-1);
693 }
694 });
695}
696
697DEF_TEST(SkVM_fnma, r) {
698 // Create a pattern that can be peepholed into an Op::fnma_f32.
699 skvm::Builder b;
700 {
701 skvm::Arg arg = b.varying<int>();
702
703 skvm::F32 x = b.to_f32(b.load32(arg)),
704 v = b.sub(b.splat(1.0f),
705 b.mul(x, b.splat(2.0f)));
706 b.store32(arg, b.trunc(v));
707 }
708
Mike Klein10fc1e62020-04-13 11:57:05 -0500709 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600710 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
711 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
712
713 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
714 REPORTER_ASSERT(r, buf[i] = 1-2*i);
715 }
716 });
717}
718
Mike Klein81d52672019-07-30 11:11:09 -0500719DEF_TEST(SkVM_madder, r) {
720 skvm::Builder b;
721 {
722 skvm::Arg arg = b.varying<float>();
723
Mike Reedf5ff4c22020-03-23 14:57:53 -0400724 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500725 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
726 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
727 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400728 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500729 }
730
Mike Klein10fc1e62020-04-13 11:57:05 -0500731 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500732 float x = 2.0f;
733 // y = 2*2 + 2 = 6
734 // z = 6*2 + 6 = 18
735 // w = 6*6 + 18 = 54
736 program.eval(1, &x);
737 REPORTER_ASSERT(r, x == 54.0f);
738 });
739}
740
Mike Kleinf22faaf2020-01-09 07:27:39 -0600741DEF_TEST(SkVM_floor, r) {
742 skvm::Builder b;
743 {
744 skvm::Arg arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400745 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600746 }
747
Mike Klein10fc1e62020-04-13 11:57:05 -0500748 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600749 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
750 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
751 program.eval(SK_ARRAY_COUNT(buf), buf);
752 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
753 REPORTER_ASSERT(r, buf[i] == want[i]);
754 }
755 });
756}
757
Mike Klein5caf7de2020-03-12 11:05:46 -0500758DEF_TEST(SkVM_round, r) {
759 skvm::Builder b;
760 {
761 skvm::Arg src = b.varying<float>();
762 skvm::Arg dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400763 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500764 }
765
766 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
767 // We haven't explicitly guaranteed that here... it just probably is.
Mike Klein10fc1e62020-04-13 11:57:05 -0500768 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500769 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
770 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
771 int dst[SK_ARRAY_COUNT(buf)];
772
773 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
774 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
775 REPORTER_ASSERT(r, dst[i] == want[i]);
776 }
777 });
778}
779
Herb Derbyc02a41f2020-02-28 14:25:45 -0600780DEF_TEST(SkVM_min, r) {
781 skvm::Builder b;
782 {
783 skvm::Arg src1 = b.varying<float>();
784 skvm::Arg src2 = b.varying<float>();
785 skvm::Arg dst = b.varying<float>();
786
Mike Reedf5ff4c22020-03-23 14:57:53 -0400787 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600788 }
789
Mike Klein10fc1e62020-04-13 11:57:05 -0500790 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600791 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
792 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
793 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
794 float d[SK_ARRAY_COUNT(s1)];
795 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
796 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
797 REPORTER_ASSERT(r, d[i] == want[i]);
798 }
799 });
800}
801
802DEF_TEST(SkVM_max, r) {
803 skvm::Builder b;
804 {
805 skvm::Arg src1 = b.varying<float>();
806 skvm::Arg src2 = b.varying<float>();
807 skvm::Arg dst = b.varying<float>();
808
Mike Reedf5ff4c22020-03-23 14:57:53 -0400809 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600810 }
811
Mike Klein10fc1e62020-04-13 11:57:05 -0500812 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600813 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
814 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
815 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
816 float d[SK_ARRAY_COUNT(s1)];
817 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
818 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
819 REPORTER_ASSERT(r, d[i] == want[i]);
820 }
821 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600822}
823
Mike Kleinf98d0d32019-07-22 14:30:18 -0500824DEF_TEST(SkVM_hoist, r) {
825 // This program uses enough constants that it will fail to JIT if we hoist them.
826 // The JIT will try again without hoisting, and that'll just need 2 registers.
827 skvm::Builder b;
828 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500829 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500830 skvm::I32 x = b.load32(arg);
831 for (int i = 0; i < 32; i++) {
832 x = b.add(x, b.splat(i));
833 }
834 b.store32(arg, x);
835 }
836
Mike Klein10fc1e62020-04-13 11:57:05 -0500837 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500838 int x = 4;
839 program.eval(1, &x);
840 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
841 // x += 496
842 REPORTER_ASSERT(r, x == 500);
843 });
844}
845
Mike Kleinb9944122019-08-02 12:22:39 -0500846DEF_TEST(SkVM_select, r) {
847 skvm::Builder b;
848 {
849 skvm::Arg buf = b.varying<int>();
850
851 skvm::I32 x = b.load32(buf);
852
853 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
854
855 b.store32(buf, x);
856 }
857
Mike Klein10fc1e62020-04-13 11:57:05 -0500858 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500859 int buf[] = { 0,1,2,3,4,5,6,7,8 };
860 program.eval(SK_ARRAY_COUNT(buf), buf);
861 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
862 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
863 }
864 });
865}
866
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500867DEF_TEST(SkVM_NewOps, r) {
868 // Exercise a somewhat arbitrary set of new ops.
869 skvm::Builder b;
870 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500871 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500872 uniforms = b.uniform();
873
874 skvm::I32 x = b.load16(buf);
875
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600876 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500877
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600878 x = b.add(x, b.uniform32(uniforms, kPtr+0));
879 x = b.mul(x, b.uniform8 (uniforms, kPtr+4));
880 x = b.sub(x, b.uniform16(uniforms, kPtr+6));
881
882 skvm::I32 limit = b.uniform32(uniforms, kPtr+8);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500883 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
884 x = b.select(b.gt(x, limit ), limit , x);
885
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600886 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500887
888 b.store16(buf, x);
889 }
890
891 if ((false)) {
892 SkDynamicMemoryWStream buf;
893 dump(b, &buf);
894 sk_sp<SkData> blob = buf.detachAsData();
895 SkDebugf("%.*s\n", blob->size(), blob->data());
896 }
897
Mike Klein10fc1e62020-04-13 11:57:05 -0500898 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500899 const int N = 31;
900 int16_t buf[N];
901 for (int i = 0; i < N; i++) {
902 buf[i] = i;
903 }
904
905 const int M = 16;
906 uint8_t img[M];
907 for (int i = 0; i < M; i++) {
908 img[i] = i*i;
909 }
910
911 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600912 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500913 int add = 5;
914 uint8_t mul = 3;
915 uint16_t sub = 18;
916 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600917 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500918
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600919 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500920
921 for (int i = 0; i < N; i++) {
922 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
923 int x = 3*(i-1);
924
925 // Then that's pinned to the limits of img.
926 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
927 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
928 REPORTER_ASSERT(r, buf[i] == img[x]);
929 }
930 });
931}
932
Mike Klein5a8404c2020-02-28 14:24:56 -0600933DEF_TEST(SkVM_sqrt, r) {
934 skvm::Builder b;
935 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400936 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600937
Mike Klein10fc1e62020-04-13 11:57:05 -0500938 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600939 constexpr int K = 17;
940 float buf[K];
941 for (int i = 0; i < K; i++) {
942 buf[i] = (float)(i*i);
943 }
944
945 // x^2 -> x
946 program.eval(K, buf);
947
948 for (int i = 0; i < K; i++) {
949 REPORTER_ASSERT(r, buf[i] == (float)i);
950 }
951 });
952}
953
Mike Klein3f7c8652019-11-07 10:33:56 -0600954DEF_TEST(SkVM_MSAN, r) {
955 // This little memset32() program should be able to JIT, but if we run that
956 // JIT code in an MSAN build, it won't see the writes initialize buf. So
957 // this tests that we're using the interpreter instead.
958 skvm::Builder b;
959 b.store32(b.varying<int>(), b.splat(42));
960
Mike Klein10fc1e62020-04-13 11:57:05 -0500961 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600962 constexpr int K = 17;
963 int buf[K]; // Intentionally uninitialized.
964 program.eval(K, buf);
965 sk_msan_assert_initialized(buf, buf+K);
966 for (int x : buf) {
967 REPORTER_ASSERT(r, x == 42);
968 }
969 });
970}
971
Mike Klein13601172019-11-08 15:01:02 -0600972DEF_TEST(SkVM_assert, r) {
973 skvm::Builder b;
974 b.assert_true(b.lt(b.load32(b.varying<int>()),
975 b.splat(42)));
976
Mike Klein10fc1e62020-04-13 11:57:05 -0500977 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600978 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600979 program.eval(SK_ARRAY_COUNT(buf), buf);
980 });
981}
982
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600983DEF_TEST(SkVM_premul, reporter) {
984 // Test that premul is short-circuited when alpha is known opaque.
985 {
986 skvm::Builder p;
987 auto rptr = p.varying<int>(),
988 aptr = p.varying<int>();
989
Mike Reedf5ff4c22020-03-23 14:57:53 -0400990 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600991 g = p.splat(0.0f),
992 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400993 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600994
995 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400996 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600997
998 // load red, load alpha, red *= alpha, store red
999 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
1000 }
1001
1002 {
1003 skvm::Builder p;
1004 auto rptr = p.varying<int>();
1005
Mike Reedf5ff4c22020-03-23 14:57:53 -04001006 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001007 g = p.splat(0.0f),
1008 b = p.splat(0.0f),
1009 a = p.splat(1.0f);
1010
1011 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001012 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001013
1014 // load red, store red
1015 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1016 }
1017
1018 // Same deal for unpremul.
1019 {
1020 skvm::Builder p;
1021 auto rptr = p.varying<int>(),
1022 aptr = p.varying<int>();
1023
Mike Reedf5ff4c22020-03-23 14:57:53 -04001024 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001025 g = p.splat(0.0f),
1026 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001027 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001028
1029 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001030 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001031
1032 // load red, load alpha, a bunch of unpremul instructions, store red
1033 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1034 }
1035
1036 {
1037 skvm::Builder p;
1038 auto rptr = p.varying<int>();
1039
Mike Reedf5ff4c22020-03-23 14:57:53 -04001040 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001041 g = p.splat(0.0f),
1042 b = p.splat(0.0f),
1043 a = p.splat(1.0f);
1044
1045 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001046 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001047
1048 // load red, store red
1049 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1050 }
1051}
Mike Klein05642042019-06-18 12:16:06 -05001052
Mike Klein05642042019-06-18 12:16:06 -05001053template <typename Fn>
1054static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -04001055 uint8_t buf[4096];
1056 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -05001057 fn(a);
1058
1059 REPORTER_ASSERT(r, a.size() == expected.size());
1060
Mike Klein88c0a902019-06-24 15:34:02 -04001061 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -05001062 want = expected.begin();
1063 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -05001064 REPORTER_ASSERT(r, got[i] == want[i],
1065 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -05001066 }
1067}
1068
1069DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -05001070 // Easiest way to generate test cases is
1071 //
1072 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1073 //
1074 // The -x86-asm-syntax=intel bit is optional, controlling the
1075 // input syntax only; the output will always be AT&T op x,y,dst style.
1076 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1077 // that a bit easier to use here, despite maybe favoring AT&T overall.
1078
1079 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -05001080 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -05001081 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -06001082 a.int3();
Mike Klein05642042019-06-18 12:16:06 -05001083 a.vzeroupper();
1084 a.ret();
1085 },{
Mike Kleinee5864a2019-11-11 09:16:44 -06001086 0xcc,
Mike Klein05642042019-06-18 12:16:06 -05001087 0xc5, 0xf8, 0x77,
1088 0xc3,
1089 });
1090
Mike Klein237dbb42019-07-19 09:44:47 -05001091 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -05001092 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -05001093 a.ret();
1094 a.align(4);
1095 },{
1096 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -05001097 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -05001098 });
Mike Klein61703a62019-06-18 15:01:12 -05001099
Mike Klein397fc882019-06-20 11:37:10 -05001100 test_asm(r, [&](A& a) {
1101 a.add(A::rax, 8); // Always good to test rax.
1102 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001103
Mike Klein397fc882019-06-20 11:37:10 -05001104 a.add(A::rdi, 12); // Last 0x48 REX
1105 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001106
Mike Klein86a645c2019-07-12 12:29:39 -05001107 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001108 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001109
Mike Klein397fc882019-06-20 11:37:10 -05001110 a.add(A::rsi, 128); // Requires 4 byte immediate.
1111 a.sub(A::r8 , 1000000);
Mike Kleinc15c9362020-04-16 11:10:36 -05001112
1113 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1114 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1115 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
1116 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
1117 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1118 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1119 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1120
1121 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1122
1123 a.add( A::rax , A::rcx); // addq %rcx, %rax
1124 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1125 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1126 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1127
1128 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
Mike Klein61703a62019-06-18 15:01:12 -05001129 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001130 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001131 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001132
1133 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001134 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001135
Mike Klein86a645c2019-07-12 12:29:39 -05001136 0x49, 0x83, 0b11'000'000, 0x07,
1137 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001138
1139 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001140 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Kleinc15c9362020-04-16 11:10:36 -05001141
1142 0x48,0x83,0x06,0x07,
1143 0x48,0x83,0x46,0x0c,0x07,
1144 0x48,0x83,0x44,0x24,0x0c,0x07,
1145 0x48,0x83,0x44,0x84,0x0c,0x07,
1146 0x4b,0x83,0x44,0x43,0x0c,0x07,
1147 0x49,0x83,0x44,0x03,0x0c,0x07,
1148 0x4a,0x83,0x44,0x18,0x0c,0x07,
1149
1150 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1151
1152 0x48,0x01,0xc8,
1153 0x48,0x01,0x08,
1154 0x48,0x01,0x48,0x0c,
1155 0x48,0x03,0x48,0x0c,
1156 0x48,0x2b,0x48,0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001157 });
Mike Klein397fc882019-06-20 11:37:10 -05001158
1159
1160 test_asm(r, [&](A& a) {
1161 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1162 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1163 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1164 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1165 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1166 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1167 },{
1168 /* VEX */ /*op*/ /*modRM*/
1169 0xc5, 0xf5, 0xfe, 0xc2,
1170 0xc5, 0x75, 0xfe, 0xc2,
1171 0xc5, 0xbd, 0xfe, 0xc2,
1172 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1173 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1174 0xc5, 0xf5, 0xfa, 0xc2,
1175 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001176
1177 test_asm(r, [&](A& a) {
Mike Klein48c51bb2020-04-23 18:07:49 -05001178 A::Label l;
1179 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
Mike Klein714f8cc2019-11-06 12:54:46 -06001180 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1181 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1182 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1183 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1184 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1185 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Klein48c51bb2020-04-23 18:07:49 -05001186 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
Mike Kleinb9944122019-08-02 12:22:39 -05001187 },{
Mike Klein48c51bb2020-04-23 18:07:49 -05001188 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
Mike Kleinb9944122019-08-02 12:22:39 -05001189 0xc5,0xf5,0x76,0xc2,
1190 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001191 0xc5,0xf4,0xc2,0xc2,0x00,
1192 0xc5,0xf4,0xc2,0xc2,0x01,
1193 0xc5,0xf4,0xc2,0xc2,0x02,
1194 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001195 });
1196
1197 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001198 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1199 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1200 },{
1201 0xc5,0xf4,0x5d,0xc2,
1202 0xc5,0xf4,0x5f,0xc2,
1203 });
1204
1205 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001206 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1207 },{
1208 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1209 });
1210
1211 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001212 a.vpsrld(A::ymm15, A::ymm2, 8);
1213 a.vpsrld(A::ymm0 , A::ymm8, 5);
1214 },{
1215 0xc5, 0x85, 0x72,0xd2, 0x08,
1216 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1217 });
1218
1219 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001220 A::Label l;
Mike Klein184f6012020-07-22 13:17:29 -05001221 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
Mike Kleind8194dc2020-07-22 10:42:11 -05001222 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
Mike Kleinff0ae812019-06-20 15:03:44 -05001223 a.vpermq(A::ymm1, A::ymm2, 5);
Mike Kleind8194dc2020-07-22 10:42:11 -05001224 a.label(&l); // 6 bytes after vperm2f128
Mike Kleinff0ae812019-06-20 15:03:44 -05001225 },{
Mike Klein184f6012020-07-22 13:17:29 -05001226 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
Mike Kleind8194dc2020-07-22 10:42:11 -05001227 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
Mike Kleinff0ae812019-06-20 15:03:44 -05001228 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1229 });
Mike Kleine5053412019-06-21 12:37:22 -05001230
1231 test_asm(r, [&](A& a) {
Mike Kleind8194dc2020-07-22 10:42:11 -05001232 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1233 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1234 },{
1235 0xc5,0xed,0x62,0x0f,
1236 0xc5,0xed,0x6a,0xcb,
1237 });
1238
1239 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001240 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1241 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1242 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1243 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1244 },{
1245 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1246 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1247 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1248 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1249 });
1250
1251 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001252 A::Label l;
1253 a.label(&l);
Mike Kleine5053412019-06-21 12:37:22 -05001254 a.byte(1);
1255 a.byte(2);
1256 a.byte(3);
1257 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001258
Mike Klein65c10b52019-07-12 09:22:21 -05001259 a.vbroadcastss(A::ymm0 , &l);
1260 a.vbroadcastss(A::ymm1 , &l);
1261 a.vbroadcastss(A::ymm8 , &l);
1262 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001263
Mike Klein65c10b52019-07-12 09:22:21 -05001264 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001265 a.vpaddd (A::ymm4, A::ymm3, &l);
1266 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001267
1268 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001269
1270 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001271 },{
1272 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001273
Mike Kleine5053412019-06-21 12:37:22 -05001274 /* VEX */ /*op*/ /* ModRM */ /* offset */
1275 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1276 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1277 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1278 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001279
1280 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001281
1282 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1283 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001284
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001285 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1286
1287 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001288 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001289
1290 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001291 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1292 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1293 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1294 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
Mike Klein94d054b2019-08-02 10:54:23 -05001295
1296 a.vbroadcastss(A::ymm8, A::xmm0);
1297 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001298 },{
1299 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1300 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1301 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1302 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1303 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001304
1305 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1306 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001307 });
1308
1309 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001310 A::Label l;
1311 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001312 a.jne(&l);
1313 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001314 a.je (&l);
1315 a.jmp(&l);
1316 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001317 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001318
Mike Kleinc15c9362020-04-16 11:10:36 -05001319 a.cmp(A::rdx, 1);
Mike Klein35b97c32019-07-12 12:32:45 -05001320 a.cmp(A::rax, 12);
1321 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001322 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001323 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1324 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1325 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1326 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1327 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001328 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001329
Mike Kleinc15c9362020-04-16 11:10:36 -05001330 0x48,0x83,0xfa,0x01,
Mike Klein35b97c32019-07-12 12:32:45 -05001331 0x48,0x83,0xf8,0x0c,
1332 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001333 });
Mike Klein120d9e82019-06-21 15:52:55 -05001334
1335 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001336 a.vmovups(A::ymm5, A::Mem{A::rsi});
1337 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001338
Mike Klein400ba222020-06-30 15:54:19 -05001339 a.vmovups(A::xmm5, A::Mem{A::rsi});
Mike Klein8390f2e2020-04-15 17:03:08 -05001340 a.vmovups(A::Mem{A::rsi}, A::xmm5);
Mike Klein95529e82019-08-02 11:43:43 -05001341
Mike Kleinedc2dac2020-04-15 16:18:27 -05001342 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1343 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001344
Mike Klein8390f2e2020-04-15 17:03:08 -05001345 a.vmovq(A::Mem{A::rdx}, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001346 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001347 /* VEX */ /*Op*/ /* ModRM */
1348 0xc5, 0xfc, 0x10, 0b00'101'110,
1349 0xc5, 0xfc, 0x11, 0b00'101'110,
1350
Mike Klein400ba222020-06-30 15:54:19 -05001351 0xc5, 0xf8, 0x10, 0b00'101'110,
Mike Klein95529e82019-08-02 11:43:43 -05001352 0xc5, 0xf8, 0x11, 0b00'101'110,
1353
Mike Klein52010b72019-08-02 11:18:00 -05001354 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001355 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001356
1357 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001358 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001359
1360 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001361 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1362 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1363 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001364
Mike Kleinedc2dac2020-04-15 16:18:27 -05001365 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1366 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1367 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001368 },{
1369 0xc5,0xfc,0x10,0x2c,0x24,
1370 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1371 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1372
1373 0xc5,0xfc,0x11,0x2c,0x24,
1374 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1375 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1376 });
1377
1378 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001379 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1380 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1381 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1382 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1383 a.movzbq(A::r8, A::Mem{A::rsi, 400});
Mike Klein35b97c32019-07-12 12:32:45 -05001384
Mike Kleinc15c9362020-04-16 11:10:36 -05001385 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1386 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1387 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1388 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1389 a.movzwq(A::r8, A::Mem{A::rsi, 400});
Mike Kleincb511042020-04-13 13:12:17 -05001390
Mike Klein8390f2e2020-04-15 17:03:08 -05001391 a.vmovd(A::Mem{A::rax}, A::xmm0);
1392 a.vmovd(A::Mem{A::rax}, A::xmm8);
1393 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1394
1395 a.vmovd(A::xmm0, A::Mem{A::rax});
1396 a.vmovd(A::xmm8, A::Mem{A::rax});
1397 a.vmovd(A::xmm0, A::Mem{A::r8 });
1398
1399 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1400 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1401 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1402
Mike Klein35b97c32019-07-12 12:32:45 -05001403 a.vmovd(A::rax, A::xmm0);
1404 a.vmovd(A::rax, A::xmm8);
Mike Klein8390f2e2020-04-15 17:03:08 -05001405 a.vmovd(A::r8 , A::xmm0);
Mike Klein35b97c32019-07-12 12:32:45 -05001406
1407 a.vmovd(A::xmm0, A::rax);
1408 a.vmovd(A::xmm8, A::rax);
Mike Klein8390f2e2020-04-15 17:03:08 -05001409 a.vmovd(A::xmm0, A::r8 );
Mike Klein35b97c32019-07-12 12:32:45 -05001410
Mike Kleinc15c9362020-04-16 11:10:36 -05001411 a.movb(A::Mem{A::rdx}, A::rax);
1412 a.movb(A::Mem{A::rdx}, A::r8 );
1413 a.movb(A::Mem{A::r8 }, A::rax);
Mike Klein35b97c32019-07-12 12:32:45 -05001414
Mike Kleinc15c9362020-04-16 11:10:36 -05001415 a.movb(A::rdx, A::Mem{A::rax});
1416 a.movb(A::rdx, A::Mem{A::r8 });
1417 a.movb(A::r8 , A::Mem{A::rax});
1418
1419 a.movb(A::rdx, 12);
1420 a.movb(A::rax, 4);
1421 a.movb(A::r8 , -1);
1422
1423 a.movb(A::Mem{A::rdx}, 12);
1424 a.movb(A::Mem{A::rax}, 4);
1425 a.movb(A::Mem{A::r8 }, -1);
1426 },{
1427 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1428 0x49,0x0f,0xb6,0x00,
1429 0x4c,0x0f,0xb6,0x06,
1430 0x4c,0x0f,0xb6,0x46, 12,
1431 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1432
1433 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1434 0x49,0x0f,0xb7,0x00,
1435 0x4c,0x0f,0xb7,0x06,
1436 0x4c,0x0f,0xb7,0x46, 12,
1437 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
Mike Kleincb511042020-04-13 13:12:17 -05001438
Mike Klein35b97c32019-07-12 12:32:45 -05001439 0xc5,0xf9,0x7e,0x00,
1440 0xc5,0x79,0x7e,0x00,
1441 0xc4,0xc1,0x79,0x7e,0x00,
1442
1443 0xc5,0xf9,0x6e,0x00,
1444 0xc5,0x79,0x6e,0x00,
1445 0xc4,0xc1,0x79,0x6e,0x00,
1446
Mike Klein93d3fab2020-01-14 10:46:44 -06001447 0xc5,0xf9,0x6e,0x04,0x88,
1448 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1449 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1450
Mike Klein35b97c32019-07-12 12:32:45 -05001451 0xc5,0xf9,0x7e,0xc0,
1452 0xc5,0x79,0x7e,0xc0,
1453 0xc4,0xc1,0x79,0x7e,0xc0,
1454
1455 0xc5,0xf9,0x6e,0xc0,
1456 0xc5,0x79,0x6e,0xc0,
1457 0xc4,0xc1,0x79,0x6e,0xc0,
1458
Mike Kleinc15c9362020-04-16 11:10:36 -05001459 0x48 ,0x88, 0x02,
1460 0x4c, 0x88, 0x02,
1461 0x49, 0x88, 0x00,
1462
1463 0x48 ,0x8a, 0x10,
1464 0x49, 0x8a, 0x10,
1465 0x4c, 0x8a, 0x00,
1466
1467 0x48, 0xc6, 0xc2, 0x0c,
1468 0x48, 0xc6, 0xc0, 0x04,
1469 0x49, 0xc6, 0xc0, 0xff,
1470
1471 0x48, 0xc6, 0x02, 0x0c,
1472 0x48, 0xc6, 0x00, 0x04,
1473 0x49, 0xc6, 0x00, 0xff,
Mike Klein35b97c32019-07-12 12:32:45 -05001474 });
1475
1476 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001477 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1478 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
Mike Klein52010b72019-08-02 11:18:00 -05001479
Mike Klein8390f2e2020-04-15 17:03:08 -05001480 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
1481 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $4, (%rsi), %xmm8, %xmm1
Mike Klein35b97c32019-07-12 12:32:45 -05001482
Mike Klein21e85eb2020-04-17 13:57:13 -05001483 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1484 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1485
1486 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1487 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1488
Mike Klein8390f2e2020-04-15 17:03:08 -05001489 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1490 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein95529e82019-08-02 11:43:43 -05001491
Mike Klein8390f2e2020-04-15 17:03:08 -05001492 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1493 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein35b97c32019-07-12 12:32:45 -05001494 },{
Mike Klein52010b72019-08-02 11:18:00 -05001495 0xc5,0xb9, 0xc4, 0x0e, 4,
1496 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1497
Mike Klein35b97c32019-07-12 12:32:45 -05001498 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1499 0xc4,0x43,0x71, 0x20, 0x00, 12,
1500
Mike Klein21e85eb2020-04-17 13:57:13 -05001501 0xc4,0x63,0x7d,0x39,0xc1, 1,
1502 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1503
1504 0xc4,0x63,0x79,0x16,0x06, 3,
1505 0xc4,0xc3,0x79,0x16,0x08, 2,
1506
Mike Klein95529e82019-08-02 11:43:43 -05001507 0xc4,0x63,0x79, 0x15, 0x06, 7,
1508 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1509
Mike Klein35b97c32019-07-12 12:32:45 -05001510 0xc4,0x63,0x79, 0x14, 0x06, 7,
1511 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1512 });
1513
1514 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001515 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1516 },{
1517 0xc5, 0x9d, 0xdf, 0xda,
1518 });
Mike Klein9f4df802019-06-24 18:47:16 -04001519
Mike Kleind4546d62019-07-30 12:15:40 -05001520 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001521 A::Label l;
1522 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1523
1524 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1525 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1526 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1527
1528 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1529 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1530
1531 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1532 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1533 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1534 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1535 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1536
1537 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1538 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1539 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1540
Mike Kleind4546d62019-07-30 12:15:40 -05001541 a.vcvttps2dq(A::ymm3, A::ymm2);
1542 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001543 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001544 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001545 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001546 },{
1547 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001548
1549 0xc5,0xfd,0x6f,0x1e,
1550 0xc5,0xfd,0x6f,0x1c,0x24,
1551 0xc4,0xc1,0x7d,0x6f,0x1b,
1552
1553 0xc5,0xfd,0x6f,0x5e,0x04,
1554 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1555
1556 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1557 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1558 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1559 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1560 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1561
1562 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1563 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1564
1565 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1566
Mike Kleind4546d62019-07-30 12:15:40 -05001567 0xc5,0xfe,0x5b,0xda,
1568 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001569 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001570 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001571 });
1572
Mike Kleinbeaa1082020-01-13 14:04:18 -06001573 test_asm(r, [&](A& a) {
Mike Klein4d680cd2020-07-15 09:58:51 -05001574 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1575 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1576
1577 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1578 a.vcvtph2ps(A::ymm2, A::xmm3);
1579 },{
1580 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1581 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1582
1583 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1584 0xc4,0xe2,0x7d,0x13,0xd3,
1585 });
1586
1587 test_asm(r, [&](A& a) {
Mike Kleinbeaa1082020-01-13 14:04:18 -06001588 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1589 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1590 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1591 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1592 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1593 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1594 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1595 },{
1596 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1597 0xc4,0xe2,0x75,0x92,0x04,0x10,
1598 0xc4,0x62,0x75,0x92,0x14,0x10,
1599 0xc4,0xa2,0x75,0x92,0x04,0x20,
1600 0xc4,0xc2,0x75,0x92,0x04,0x11,
1601 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1602 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1603 });
1604
Mike Kleinc322f632020-01-13 16:18:58 -06001605 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001606 a.mov(A::rax, A::Mem{A::rdi, 0});
1607 a.mov(A::rax, A::Mem{A::rdi, 1});
1608 a.mov(A::rax, A::Mem{A::rdi, 512});
1609 a.mov(A::r15, A::Mem{A::r13, 42});
1610 a.mov(A::rax, A::Mem{A::r13, 42});
1611 a.mov(A::r15, A::Mem{A::rax, 42});
1612 a.mov(A::rax, 1);
1613 a.mov(A::rax, A::rcx);
Mike Kleinc322f632020-01-13 16:18:58 -06001614 },{
1615 0x48, 0x8b, 0x07,
1616 0x48, 0x8b, 0x47, 0x01,
1617 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1618 0x4d, 0x8b, 0x7d, 0x2a,
1619 0x49, 0x8b, 0x45, 0x2a,
1620 0x4c, 0x8b, 0x78, 0x2a,
Mike Kleinc15c9362020-04-16 11:10:36 -05001621 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1622 0x48, 0x89, 0xc8,
Mike Kleinc322f632020-01-13 16:18:58 -06001623 });
1624
Mike Klein9f4df802019-06-24 18:47:16 -04001625 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1626
1627 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001628 a.and16b(A::v4, A::v3, A::v1);
1629 a.orr16b(A::v4, A::v3, A::v1);
1630 a.eor16b(A::v4, A::v3, A::v1);
1631 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001632 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001633 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001634
1635 a.add4s(A::v4, A::v3, A::v1);
1636 a.sub4s(A::v4, A::v3, A::v1);
1637 a.mul4s(A::v4, A::v3, A::v1);
1638
Mike Klein97afd2e2019-10-16 14:11:27 -05001639 a.cmeq4s(A::v4, A::v3, A::v1);
1640 a.cmgt4s(A::v4, A::v3, A::v1);
1641
Mike Klein65809142019-06-25 09:44:02 -04001642 a.sub8h(A::v4, A::v3, A::v1);
1643 a.mul8h(A::v4, A::v3, A::v1);
1644
Mike Klein9f4df802019-06-24 18:47:16 -04001645 a.fadd4s(A::v4, A::v3, A::v1);
1646 a.fsub4s(A::v4, A::v3, A::v1);
1647 a.fmul4s(A::v4, A::v3, A::v1);
1648 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001649 a.fmin4s(A::v4, A::v3, A::v1);
1650 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein7c0332c2020-03-05 14:18:04 -06001651 a.fneg4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001652
Mike Klein65809142019-06-25 09:44:02 -04001653 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001654 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001655
1656 a.fcmeq4s(A::v4, A::v3, A::v1);
1657 a.fcmgt4s(A::v4, A::v3, A::v1);
1658 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001659 },{
Mike Klein65809142019-06-25 09:44:02 -04001660 0x64,0x1c,0x21,0x4e,
1661 0x64,0x1c,0xa1,0x4e,
1662 0x64,0x1c,0x21,0x6e,
1663 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001664 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001665 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001666
1667 0x64,0x84,0xa1,0x4e,
1668 0x64,0x84,0xa1,0x6e,
1669 0x64,0x9c,0xa1,0x4e,
1670
Mike Klein97afd2e2019-10-16 14:11:27 -05001671 0x64,0x8c,0xa1,0x6e,
1672 0x64,0x34,0xa1,0x4e,
1673
Mike Klein65809142019-06-25 09:44:02 -04001674 0x64,0x84,0x61,0x6e,
1675 0x64,0x9c,0x61,0x4e,
1676
Mike Klein9f4df802019-06-24 18:47:16 -04001677 0x64,0xd4,0x21,0x4e,
1678 0x64,0xd4,0xa1,0x4e,
1679 0x64,0xdc,0x21,0x6e,
1680 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001681 0x64,0xf4,0xa1,0x4e,
1682 0x64,0xf4,0x21,0x4e,
Mike Klein7c0332c2020-03-05 14:18:04 -06001683 0x64,0xf8,0xa0,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001684
Mike Klein65809142019-06-25 09:44:02 -04001685 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001686 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001687
1688 0x64,0xe4,0x21,0x4e,
1689 0x64,0xe4,0xa1,0x6e,
1690 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001691 });
1692
1693 test_asm(r, [&](A& a) {
1694 a.shl4s(A::v4, A::v3, 0);
1695 a.shl4s(A::v4, A::v3, 1);
1696 a.shl4s(A::v4, A::v3, 8);
1697 a.shl4s(A::v4, A::v3, 16);
1698 a.shl4s(A::v4, A::v3, 31);
1699
1700 a.sshr4s(A::v4, A::v3, 1);
1701 a.sshr4s(A::v4, A::v3, 8);
1702 a.sshr4s(A::v4, A::v3, 31);
1703
1704 a.ushr4s(A::v4, A::v3, 1);
1705 a.ushr4s(A::v4, A::v3, 8);
1706 a.ushr4s(A::v4, A::v3, 31);
1707
1708 a.ushr8h(A::v4, A::v3, 1);
1709 a.ushr8h(A::v4, A::v3, 8);
1710 a.ushr8h(A::v4, A::v3, 15);
1711 },{
1712 0x64,0x54,0x20,0x4f,
1713 0x64,0x54,0x21,0x4f,
1714 0x64,0x54,0x28,0x4f,
1715 0x64,0x54,0x30,0x4f,
1716 0x64,0x54,0x3f,0x4f,
1717
1718 0x64,0x04,0x3f,0x4f,
1719 0x64,0x04,0x38,0x4f,
1720 0x64,0x04,0x21,0x4f,
1721
1722 0x64,0x04,0x3f,0x6f,
1723 0x64,0x04,0x38,0x6f,
1724 0x64,0x04,0x21,0x6f,
1725
1726 0x64,0x04,0x1f,0x6f,
1727 0x64,0x04,0x18,0x6f,
1728 0x64,0x04,0x11,0x6f,
1729 });
1730
1731 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001732 a.sli4s(A::v4, A::v3, 0);
1733 a.sli4s(A::v4, A::v3, 1);
1734 a.sli4s(A::v4, A::v3, 8);
1735 a.sli4s(A::v4, A::v3, 16);
1736 a.sli4s(A::v4, A::v3, 31);
1737 },{
1738 0x64,0x54,0x20,0x6f,
1739 0x64,0x54,0x21,0x6f,
1740 0x64,0x54,0x28,0x6f,
1741 0x64,0x54,0x30,0x6f,
1742 0x64,0x54,0x3f,0x6f,
1743 });
1744
1745 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001746 a.scvtf4s (A::v4, A::v3);
1747 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001748 a.fcvtns4s(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001749 },{
1750 0x64,0xd8,0x21,0x4e,
1751 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001752 0x64,0xa8,0x21,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001753 });
Mike Klein15a368d2019-06-26 10:21:12 -04001754
1755 test_asm(r, [&](A& a) {
Mike Kleinb8e041e2020-04-17 11:30:29 -05001756 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1757 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1758 a.strq(A::v1, A::sp); // str q1, [sp]
1759 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
1760 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1761 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
1762 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
1763 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1764 a.add (A::sp, A::sp, 32); // add sp, sp, #32
Mike Klein48e78242020-04-17 09:38:09 -05001765 },{
1766 0xff,0x83,0x00,0xd1,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001767 0xe0,0x07,0x80,0x3d,
1768 0xe1,0x03,0x80,0x3d,
1769 0xe0,0x1b,0x00,0xbd,
1770 0xe0,0xbf,0x00,0x3d,
1771 0xe9,0xab,0x40,0x3d,
1772 0xe7,0x2b,0x40,0xbd,
1773 0xe5,0x03,0xc2,0x3d,
Mike Klein48e78242020-04-17 09:38:09 -05001774 0xff,0x83,0x00,0x91,
1775 });
1776
1777 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001778 a.brk(0);
1779 a.brk(65535);
1780
Mike Klein15a368d2019-06-26 10:21:12 -04001781 a.ret(A::x30); // Conventional ret using link register.
1782 a.ret(A::x13); // Can really return using any register if we like.
1783
1784 a.add(A::x2, A::x2, 4);
1785 a.add(A::x3, A::x2, 32);
1786
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001787 a.sub(A::x2, A::x2, 4);
1788 a.sub(A::x3, A::x2, 32);
1789
Mike Klein15a368d2019-06-26 10:21:12 -04001790 a.subs(A::x2, A::x2, 4);
1791 a.subs(A::x3, A::x2, 32);
1792
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001793 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1794 a.cmp(A::x2, 4);
1795
Mike Kleinc74db792020-05-11 11:57:12 -05001796 A::Label l;
1797 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001798 a.bne(&l);
1799 a.bne(&l);
1800 a.blt(&l);
1801 a.b(&l);
1802 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001803 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -04001804 },{
Mike Klein37be7712019-11-13 13:19:01 -06001805 0x00,0x00,0x20,0xd4,
1806 0xe0,0xff,0x3f,0xd4,
1807
Mike Klein15a368d2019-06-26 10:21:12 -04001808 0xc0,0x03,0x5f,0xd6,
1809 0xa0,0x01,0x5f,0xd6,
1810
1811 0x42,0x10,0x00,0x91,
1812 0x43,0x80,0x00,0x91,
1813
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001814 0x42,0x10,0x00,0xd1,
1815 0x43,0x80,0x00,0xd1,
1816
Mike Klein15a368d2019-06-26 10:21:12 -04001817 0x42,0x10,0x00,0xf1,
1818 0x43,0x80,0x00,0xf1,
1819
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001820 0x5f,0x10,0x00,0xf1,
1821 0x5f,0x10,0x00,0xf1,
1822
1823 0x01,0x00,0x00,0x54, // b.ne #0
1824 0xe1,0xff,0xff,0x54, // b.ne #-4
1825 0xcb,0xff,0xff,0x54, // b.lt #-8
1826 0xae,0xff,0xff,0x54, // b.al #-12
1827 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1828 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -04001829 });
Mike Kleine51632e2019-06-26 14:47:43 -04001830
Mike Kleince7b88c2019-07-11 14:06:40 -05001831 // Can we cbz() to a not-yet-defined label?
1832 test_asm(r, [&](A& a) {
1833 A::Label l;
1834 a.cbz(A::x2, &l);
1835 a.add(A::x3, A::x2, 32);
1836 a.label(&l);
1837 a.ret(A::x30);
1838 },{
1839 0x42,0x00,0x00,0xb4, // cbz x2, #8
1840 0x43,0x80,0x00,0x91, // add x3, x2, #32
1841 0xc0,0x03,0x5f,0xd6, // ret
1842 });
1843
1844 // If we start a label as a backward label,
1845 // can we redefine it to be a future label?
1846 // (Not sure this is useful... just want to test it works.)
1847 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001848 A::Label l1;
1849 a.label(&l1);
Mike Kleince7b88c2019-07-11 14:06:40 -05001850 a.add(A::x3, A::x2, 32);
1851 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1852
Mike Kleinc74db792020-05-11 11:57:12 -05001853 A::Label l2; // Start off the same...
1854 a.label(&l2);
Mike Kleince7b88c2019-07-11 14:06:40 -05001855 a.add(A::x3, A::x2, 32);
1856 a.cbz(A::x2, &l2); // Looks like this will go backward...
1857 a.add(A::x2, A::x2, 4);
1858 a.add(A::x3, A::x2, 32);
1859 a.label(&l2); // But no... actually forward! What a switcheroo!
1860 },{
1861 0x43,0x80,0x00,0x91, // add x3, x2, #32
1862 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1863
1864 0x43,0x80,0x00,0x91, // add x3, x2, #32
1865 0x62,0x00,0x00,0xb4, // cbz x2, #12
1866 0x42,0x10,0x00,0x91, // add x2, x2, #4
1867 0x43,0x80,0x00,0x91, // add x3, x2, #32
1868 });
1869
Mike Klein81d52672019-07-30 11:11:09 -05001870 // Loading from a label on ARM.
1871 test_asm(r, [&](A& a) {
1872 A::Label fore,aft;
1873 a.label(&fore);
1874 a.word(0x01234567);
1875 a.ldrq(A::v1, &fore);
1876 a.ldrq(A::v2, &aft);
1877 a.label(&aft);
1878 a.word(0x76543210);
1879 },{
1880 0x67,0x45,0x23,0x01,
1881 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1882 0x22,0x00,0x00,0x9c, // ldr q2, #4
1883 0x10,0x32,0x54,0x76,
1884 });
1885
Mike Kleine51632e2019-06-26 14:47:43 -04001886 test_asm(r, [&](A& a) {
1887 a.ldrq(A::v0, A::x8);
1888 a.strq(A::v0, A::x8);
1889 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001890 0x00,0x01,0xc0,0x3d,
1891 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001892 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001893
1894 test_asm(r, [&](A& a) {
1895 a.xtns2h(A::v0, A::v0);
1896 a.xtnh2b(A::v0, A::v0);
1897 a.strs (A::v0, A::x0);
1898
1899 a.ldrs (A::v0, A::x0);
1900 a.uxtlb2h(A::v0, A::v0);
1901 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001902
1903 a.uminv4s(A::v3, A::v4);
1904 a.fmovs (A::x3, A::v4); // fmov w3,s4
Mike Klein1fa149a2019-07-01 11:18:08 -05001905 },{
1906 0x00,0x28,0x61,0x0e,
1907 0x00,0x28,0x21,0x0e,
1908 0x00,0x00,0x00,0xbd,
1909
1910 0x00,0x00,0x40,0xbd,
1911 0x00,0xa4,0x08,0x2f,
1912 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001913
1914 0x83,0xa8,0xb1,0x6e,
1915 0x83,0x00,0x26,0x1e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001916 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001917
1918 test_asm(r, [&](A& a) {
1919 a.ldrb(A::v0, A::x8);
1920 a.strb(A::v0, A::x8);
1921 },{
1922 0x00,0x01,0x40,0x3d,
1923 0x00,0x01,0x00,0x3d,
1924 });
Mike Klein81d52672019-07-30 11:11:09 -05001925
1926 test_asm(r, [&](A& a) {
1927 a.tbl(A::v0, A::v1, A::v2);
1928 },{
1929 0x20,0x00,0x02,0x4e,
1930 });
Mike Klein05642042019-06-18 12:16:06 -05001931}
Mike Reedbcb46c02020-03-23 17:51:01 -04001932
1933DEF_TEST(SkVM_approx_math, r) {
1934 auto eval = [](int N, float values[], auto fn) {
1935 skvm::Builder b;
1936 skvm::Arg inout = b.varying<float>();
1937
1938 b.storeF(inout, fn(&b, b.loadF(inout)));
1939
1940 b.done().eval(N, values);
1941 };
1942
1943 auto compare = [r](int N, const float values[], const float expected[]) {
1944 for (int i = 0; i < N; ++i) {
1945 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1946 }
1947 };
1948
1949 // log2
1950 {
1951 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1952 constexpr int N = SK_ARRAY_COUNT(values);
1953 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1954 return b->approx_log2(v);
1955 });
1956 const float expected[] = {-2, -1, 0, 1, 2, 3};
1957 compare(N, values, expected);
1958 }
1959
1960 // pow2
1961 {
1962 float values[] = {-2, -1, 0, 1, 2, 3};
1963 constexpr int N = SK_ARRAY_COUNT(values);
1964 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1965 return b->approx_pow2(v);
1966 });
1967 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
1968 compare(N, values, expected);
1969 }
1970
1971 // powf -- x^0.5
1972 {
1973 float bases[] = {0, 1, 4, 9, 16};
1974 constexpr int N = SK_ARRAY_COUNT(bases);
1975 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
1976 return b->approx_powf(base, b->splat(0.5f));
1977 });
1978 const float expected[] = {0, 1, 2, 3, 4};
1979 compare(N, bases, expected);
1980 }
1981 // powf -- 3^x
1982 {
1983 float exps[] = {-2, -1, 0, 1, 2};
1984 constexpr int N = SK_ARRAY_COUNT(exps);
1985 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
1986 return b->approx_powf(b->splat(3.0f), exp);
1987 });
1988 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
1989 compare(N, exps, expected);
1990 }
Mike Reed82ff25e2020-04-07 13:51:41 -04001991
Mike Reedd468a162020-04-11 14:14:00 -04001992 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04001993 skvm::Builder b;
1994 skvm::Arg inout = b.varying<float>();
1995 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04001996 float actual = arg;
1997 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04001998
Mike Reedd468a162020-04-11 14:14:00 -04001999 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04002000
2001 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04002002 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04002003 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04002004 }
Mike Reed1b84ef22020-04-13 17:56:24 -04002005 return err;
2006 };
2007
2008 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2009 skvm::Builder b;
2010 skvm::Arg in0 = b.varying<float>();
2011 skvm::Arg in1 = b.varying<float>();
2012 skvm::Arg out = b.varying<float>();
2013 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2014 float actual;
2015 b.done().eval(1, &arg0, &arg1, &actual);
2016
2017 float err = std::abs(actual - expected);
2018
2019 if (err > tolerance) {
2020 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2021 REPORTER_ASSERT(r, true);
2022 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002023 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04002024 };
2025
Mike Reed801ba0d2020-04-10 12:37:36 -04002026 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04002027 {
2028 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04002029 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04002030 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2031 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2032 return approx_sin(x);
2033 });
2034 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2035 return approx_cos(x);
2036 });
2037 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002038
2039 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2040 // so bring in the domain a little.
2041 constexpr float eps = 0.16f;
2042 float err = 0;
2043 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2044 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2045 return approx_tan(x);
2046 });
2047 // try again with some multiples of P, to check our periodicity
2048 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2049 return approx_tan(x + 3*P);
2050 });
2051 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2052 return approx_tan(x - 3*P);
2053 });
2054 }
Mike Reedd468a162020-04-11 14:14:00 -04002055 if (0) { SkDebugf("tan error %g\n", err); }
2056 }
2057
2058 // asin, acos, atan
2059 {
2060 constexpr float tol = 0.00175f;
2061 float err = 0;
2062 for (float x = -1; x <= 1; x += 1.0f/64) {
2063 err += test(x, asin(x), tol, [](skvm::F32 x) {
2064 return approx_asin(x);
2065 });
2066 test(x, acos(x), tol, [](skvm::F32 x) {
2067 return approx_acos(x);
2068 });
Mike Reed801ba0d2020-04-10 12:37:36 -04002069 }
Mike Reedd468a162020-04-11 14:14:00 -04002070 if (0) { SkDebugf("asin error %g\n", err); }
2071
2072 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04002073 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04002074 err += test(x, atan(x), tol, [](skvm::F32 x) {
2075 return approx_atan(x);
2076 });
2077 }
2078 if (0) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04002079
2080 for (float y = -3; y <= 3; y += 1) {
2081 for (float x = -3; x <= 3; x += 1) {
2082 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
Mike Klein962020f2020-05-01 10:35:18 -05002083 return approx_atan2(y,x);
Mike Reed1b84ef22020-04-13 17:56:24 -04002084 });
2085 }
2086 }
2087 if (0) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04002088 }
Mike Reedbcb46c02020-03-23 17:51:01 -04002089}
Mike Klein210288f2020-04-08 11:31:07 -05002090
2091DEF_TEST(SkVM_min_max, r) {
2092 // min() and max() have subtle behavior when one argument is NaN and
2093 // the other isn't. It's not sound to blindly swap their arguments.
2094 //
2095 // All backends must behave like std::min() and std::max(), which are
2096 //
2097 // min(x,y) = y<x ? y : x
2098 // max(x,y) = x<y ? y : x
2099
2100 // ±NaN, ±0, ±1, ±inf
2101 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2102 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2103
2104 float f[8];
2105 memcpy(f, bits, sizeof(bits));
2106
2107 auto identical = [&](float x, float y) {
2108 uint32_t X,Y;
2109 memcpy(&X, &x, 4);
2110 memcpy(&Y, &y, 4);
2111 return X == Y;
2112 };
2113
2114 // Test min/max with non-constant x, non-constant y.
2115 // (Whether x and y are varying or uniform shouldn't make any difference.)
2116 {
2117 skvm::Builder b;
2118 {
2119 skvm::Arg src = b.varying<float>(),
2120 mn = b.varying<float>(),
2121 mx = b.varying<float>();
2122
2123 skvm::F32 x = b.loadF(src),
2124 y = b.uniformF(b.uniform(), 0);
2125
2126 b.storeF(mn, b.min(x,y));
2127 b.storeF(mx, b.max(x,y));
2128 }
2129
Mike Klein10fc1e62020-04-13 11:57:05 -05002130 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002131 float mn[8], mx[8];
2132 for (int i = 0; i < 8; i++) {
2133 // min() and max() everything with f[i].
2134 program.eval(8, f,mn,mx, &f[i]);
2135
2136 for (int j = 0; j < 8; j++) {
2137 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2138 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2139 }
2140 }
2141 });
2142 }
2143
2144 // Test each with constant on the right.
2145 for (int i = 0; i < 8; i++) {
2146 skvm::Builder b;
2147 {
2148 skvm::Arg src = b.varying<float>(),
2149 mn = b.varying<float>(),
2150 mx = b.varying<float>();
2151
2152 skvm::F32 x = b.loadF(src),
2153 y = b.splat(f[i]);
2154
2155 b.storeF(mn, b.min(x,y));
2156 b.storeF(mx, b.max(x,y));
2157 }
2158
Mike Klein10fc1e62020-04-13 11:57:05 -05002159 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002160 float mn[8], mx[8];
2161 program.eval(8, f,mn,mx);
2162 for (int j = 0; j < 8; j++) {
2163 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2164 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2165 }
2166 });
2167 }
2168
2169 // Test each with constant on the left.
2170 for (int i = 0; i < 8; i++) {
2171 skvm::Builder b;
2172 {
2173 skvm::Arg src = b.varying<float>(),
2174 mn = b.varying<float>(),
2175 mx = b.varying<float>();
2176
2177 skvm::F32 x = b.splat(f[i]),
2178 y = b.loadF(src);
2179
2180 b.storeF(mn, b.min(x,y));
2181 b.storeF(mx, b.max(x,y));
2182 }
2183
Mike Klein10fc1e62020-04-13 11:57:05 -05002184 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002185 float mn[8], mx[8];
2186 program.eval(8, f,mn,mx);
2187 for (int j = 0; j < 8; j++) {
2188 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2189 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2190 }
2191 });
2192 }
2193}
Mike Klein4d680cd2020-07-15 09:58:51 -05002194
2195DEF_TEST(SkVM_halfs, r) {
2196 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2197 0xc400,0xb800,0xbc00,0xc000};
2198 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2199 -4.0f,-0.5f,-1.0f,-2.0f};
2200 {
2201 skvm::Builder b;
2202 skvm::Arg src = b.varying<uint16_t>(),
2203 dst = b.varying<float>();
2204 b.storeF(dst, b.from_half(b.load16(src)));
2205
2206 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2207 float dst[8];
2208 program.eval(8, hs, dst);
2209 for (int i = 0; i < 8; i++) {
2210 REPORTER_ASSERT(r, dst[i] == fs[i]);
2211 }
2212 });
2213 }
2214 {
2215 skvm::Builder b;
2216 skvm::Arg src = b.varying<float>(),
2217 dst = b.varying<uint16_t>();
2218 b.store16(dst, b.to_half(b.loadF(src)));
2219
2220 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2221 uint16_t dst[8];
2222 program.eval(8, fs, dst);
2223 for (int i = 0; i < 8; i++) {
2224 REPORTER_ASSERT(r, dst[i] == hs[i]);
2225 }
2226 });
2227 }
2228}
Mike Klein6732da02020-07-16 13:03:18 -05002229
2230DEF_TEST(SkVM_64bit, r) {
2231 uint32_t lo[65],
2232 hi[65];
2233 uint64_t wide[65];
2234 for (int i = 0; i < 65; i++) {
2235 lo[i] = 2*i+0;
2236 hi[i] = 2*i+1;
2237 wide[i] = ((uint64_t)lo[i] << 0)
2238 | ((uint64_t)hi[i] << 32);
2239 }
2240
2241 {
2242 skvm::Builder b;
2243 {
2244 skvm::Arg wide = b.varying<uint64_t>(),
2245 lo = b.varying<int>(),
2246 hi = b.varying<int>();
2247 b.store32(lo, b.load64_lo(wide));
2248 b.store32(hi, b.load64_hi(wide));
2249 }
2250 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2251 uint32_t l[65], h[65];
2252 program.eval(65, wide,l,h);
2253 for (int i = 0; i < 65; i++) {
2254 REPORTER_ASSERT(r, l[i] == lo[i]);
2255 REPORTER_ASSERT(r, h[i] == hi[i]);
2256 }
2257 });
2258 }
2259
2260 {
2261 skvm::Builder b;
2262 {
2263 skvm::Arg wide = b.varying<uint64_t>(),
2264 lo = b.varying<int>(),
2265 hi = b.varying<int>();
2266 b.store64(wide, b.load32(lo), b.load32(hi));
2267 }
2268 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2269 uint64_t w[65];
2270 program.eval(65, w,lo,hi);
2271 for (int i = 0; i < 65; i++) {
2272 REPORTER_ASSERT(r, w[i] == wide[i]);
2273 }
2274 });
2275 }
2276}
Mike Kleine942b8c2020-07-21 10:17:14 -05002277
2278DEF_TEST(SkVM_is_NaN_is_finite, r) {
2279 skvm::Builder b;
2280 {
2281 skvm::Arg src = b.varying<float>(),
2282 nan = b.varying<int>(),
2283 fin = b.varying<int>();
2284 b.store32(nan, is_NaN (b.loadF(src)));
2285 b.store32(fin, is_finite(b.loadF(src)));
2286 }
2287 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2288 // ±NaN, ±0, ±1, ±inf
2289 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2290 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2291 uint32_t nan[8], fin[8];
2292 program.eval(8, bits, nan,fin);
2293
2294 for (int i = 0; i < 8; i++) {
2295 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2296 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2297 i == 4 || i == 5) ? 0xffffffff : 0));
2298 }
2299 });
2300}