blob: de42a01a44e5b5c761d276ea99ea31e3df054f45 [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050014#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050015#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050016
Mike Klein7b7077c2019-06-03 17:10:59 -050017using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050018const char* fmt_name(Fmt fmt) {
19 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050020 case Fmt::A8: return "A8";
21 case Fmt::G8: return "G8";
22 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050023 }
24 return "";
25}
26
Mike Klein6b4143e2019-09-18 11:49:29 -050027static void dump(skvm::Builder& builder, SkWStream* o) {
28 skvm::Program program = builder.done();
29 builder.dump(o);
30 o->writeText("\n");
31 program.dump(o);
32 o->writeText("\n");
33}
Mike Klein7e650762019-07-02 15:21:11 -050034
Mike Klein9977efa2019-07-15 12:22:36 -050035template <typename Fn>
Mike Klein10fc1e62020-04-13 11:57:05 -050036static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
Mike Klein10fc1e62020-04-13 11:57:05 -050037 if (program.hasJIT()) {
Mike Kleinb5a30762019-10-16 10:11:56 -050038 test((const skvm::Program&) program);
39 program.dropJIT();
40 }
Mike Klein10fc1e62020-04-13 11:57:05 -050041 test((const skvm::Program&) program);
Mike Kleinb5a30762019-10-16 10:11:56 -050042}
43
44
Mike Klein68c50d02019-05-29 12:57:54 -050045DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050046 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050047
48 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050049 for (int s = 0; s < 3; s++)
50 for (int d = 0; d < 3; d++) {
51 auto srcFmt = (Fmt)s,
52 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050053 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050054
Mike Klein267f5072019-06-03 16:27:46 -050055 buf.writeText(fmt_name(srcFmt));
56 buf.writeText(" over ");
57 buf.writeText(fmt_name(dstFmt));
58 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050059 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050060 }
Mike Klein68c50d02019-05-29 12:57:54 -050061
Mike Klein7b7077c2019-06-03 17:10:59 -050062 // Write the I32 Srcovers also.
63 {
Mike Kleinaab45b52019-07-02 15:39:23 -050064 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050065 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050066 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050067 }
Mike Klein7b7077c2019-06-03 17:10:59 -050068
Mike Kleinf9963112019-08-08 15:13:25 -040069 {
Mike Kleind48488b2019-10-22 12:27:58 -050070 // Demonstrate the value of program reordering.
71 skvm::Builder b;
72 skvm::Arg sp = b.varying<int>(),
73 dp = b.varying<int>();
74
75 skvm::I32 byte = b.splat(0xff);
76
77 skvm::I32 src = b.load32(sp),
78 sr = b.extract(src, 0, byte),
79 sg = b.extract(src, 8, byte),
80 sb = b.extract(src, 16, byte),
81 sa = b.extract(src, 24, byte);
82
83 skvm::I32 dst = b.load32(dp),
84 dr = b.extract(dst, 0, byte),
85 dg = b.extract(dst, 8, byte),
86 db = b.extract(dst, 16, byte),
87 da = b.extract(dst, 24, byte);
88
89 skvm::I32 R = b.add(sr, dr),
90 G = b.add(sg, dg),
91 B = b.add(sb, db),
92 A = b.add(sa, da);
93
94 skvm::I32 rg = b.pack(R, G, 8),
95 ba = b.pack(B, A, 8),
96 rgba = b.pack(rg, ba, 16);
97
98 b.store32(dp, rgba);
99
100 dump(b, &buf);
101 }
102
Mike Klein238105b2020-03-04 17:05:32 -0600103 // Our checked in dump expectations assume we have FMA support.
Mike Klein10fc1e62020-04-13 11:57:05 -0500104 if (skvm::fma_supported()) {
Ben Wagnere8ffb082020-05-04 10:50:08 -0400105 sk_sp<SkData> actual = buf.detachAsData();
106 bool writeActualAsNewExpectation = false;
Mike Klein238105b2020-03-04 17:05:32 -0600107 {
Mike Klein238105b2020-03-04 17:05:32 -0600108 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
Ben Wagnere8ffb082020-05-04 10:50:08 -0400109 if (!expected) {
110 ERRORF(r, "Couldn't load SkVMTest.expected.");
111 writeActualAsNewExpectation = true;
Mike Klein267f5072019-06-03 16:27:46 -0500112
Ben Wagnere8ffb082020-05-04 10:50:08 -0400113 } else if (!expected->equals(actual.get())) {
114 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
Adlai Holler684838f2020-05-12 10:41:04 -0400115 (int)expected->size(), expected->data(),
116 (int)actual->size(), actual->data());
Ben Wagnere8ffb082020-05-04 10:50:08 -0400117 writeActualAsNewExpectation = true;
118 }
119 }
120 if (writeActualAsNewExpectation) {
121 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
122 if (out.isValid()) {
123 out.write(actual->data(), actual->size());
Mike Klein77163312019-06-04 13:35:32 -0500124 }
Mike Klein68c50d02019-05-29 12:57:54 -0500125 }
126 }
127
Mike Klein9977efa2019-07-15 12:22:36 -0500128 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500129 uint32_t src[9];
130 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500131
Mike Klein10fc1e62020-04-13 11:57:05 -0500132 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
Mike Klein9977efa2019-07-15 12:22:36 -0500133 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
134 src[i] = 0xbb007733;
135 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500136 }
Mike Klein9977efa2019-07-15 12:22:36 -0500137
138 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
139
140 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
141
142 // dst is probably 0xff2dad72.
143 for (auto got : dst) {
144 auto want = expected;
145 for (int i = 0; i < 4; i++) {
146 uint8_t d = got & 0xff,
147 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500148 if (abs(d-w) >= 2) {
149 SkDebugf("d %02x, w %02x\n", d,w);
150 }
Mike Klein9977efa2019-07-15 12:22:36 -0500151 REPORTER_ASSERT(r, abs(d-w) < 2);
152 got >>= 8;
153 want >>= 8;
154 }
155 }
156 });
Mike Klein3f593792019-06-12 12:54:52 -0500157 };
Mike Klein68c50d02019-05-29 12:57:54 -0500158
Mike Klein37607d42019-07-18 10:17:28 -0500159 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
160 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500161
Mike Klein10fc1e62020-04-13 11:57:05 -0500162 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500163 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500164 uint32_t src[9];
165 uint8_t dst[SK_ARRAY_COUNT(src)];
166
167 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
168 src[i] = 0xbb007733;
169 dst[i] = 0x42;
170 }
171
172 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
173 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500174
175 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
176 SkGetPackedG32(over),
177 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500178 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500179
Mike Klein3f593792019-06-12 12:54:52 -0500180 for (auto got : dst) {
181 REPORTER_ASSERT(r, abs(got-want) < 3);
182 }
Mike Klein9977efa2019-07-15 12:22:36 -0500183 });
Mike Klein68c50d02019-05-29 12:57:54 -0500184
Mike Klein10fc1e62020-04-13 11:57:05 -0500185 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500186 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500187 uint8_t src[256],
188 dst[256];
189 for (int i = 0; i < 256; i++) {
190 src[i] = 255 - i;
191 dst[i] = i;
192 }
193
194 program.eval(256, src, dst);
195
196 for (int i = 0; i < 256; i++) {
197 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
198 SkPackARGB32( i, 0,0,0)));
199 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
200 }
Mike Klein9977efa2019-07-15 12:22:36 -0500201 });
Mike Klein68c50d02019-05-29 12:57:54 -0500202}
Mike Klein81756e42019-06-12 11:36:28 -0500203
Mike Klein7542ab52020-04-02 08:50:16 -0500204DEF_TEST(SkVM_eliminate_dead_code, r) {
205 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400206 {
Mike Klein7542ab52020-04-02 08:50:16 -0500207 skvm::Arg arg = b.varying<int>();
208 skvm::I32 l = b.load32(arg);
209 skvm::I32 a = b.add(l, l);
210 b.add(a, b.splat(7));
211 }
Herb Derbyf20400e2020-03-18 16:11:25 -0400212
Mike Klein7542ab52020-04-02 08:50:16 -0500213 std::vector<skvm::Instruction> program = b.program();
214 REPORTER_ASSERT(r, program.size() == 4);
215
Mike Klein5b701e12020-04-02 10:34:24 -0500216 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -0500217 REPORTER_ASSERT(r, program.size() == 0);
218}
219
220DEF_TEST(SkVM_Usage, r) {
221 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400222 {
Mike Klein7542ab52020-04-02 08:50:16 -0500223 skvm::Arg arg = b.varying<int>(),
224 buf = b.varying<int>();
225 skvm::I32 l = b.load32(arg);
226 skvm::I32 a = b.add(l, l);
227 skvm::I32 s = b.add(a, b.splat(7));
228 b.store32(buf, s);
Herb Derbyf20400e2020-03-18 16:11:25 -0400229 }
Mike Klein7542ab52020-04-02 08:50:16 -0500230
Mike Kleinb7d87902020-04-02 10:14:35 -0500231 skvm::Usage usage{b.program()};
Mike Klein7542ab52020-04-02 08:50:16 -0500232 REPORTER_ASSERT(r, b.program()[0].op == skvm::Op::load32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500233 REPORTER_ASSERT(r, usage[0].size() == 2);
Mike Klein7542ab52020-04-02 08:50:16 -0500234 REPORTER_ASSERT(r, b.program()[1].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500235 REPORTER_ASSERT(r, usage[1].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500236 REPORTER_ASSERT(r, b.program()[2].op == skvm::Op::splat);
Mike Kleinb7d87902020-04-02 10:14:35 -0500237 REPORTER_ASSERT(r, usage[2].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500238 REPORTER_ASSERT(r, b.program()[3].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500239 REPORTER_ASSERT(r, usage[3].size() == 1);
Herb Derbyf20400e2020-03-18 16:11:25 -0400240}
241
Mike Klein9fdadb92019-07-30 12:30:13 -0500242DEF_TEST(SkVM_Pointless, r) {
243 // Let's build a program with no memory arguments.
244 // It should all be pegged as dead code, but we should be able to "run" it.
245 skvm::Builder b;
246 {
247 b.add(b.splat(5.0f),
248 b.splat(4.0f));
249 }
250
Mike Klein10fc1e62020-04-13 11:57:05 -0500251 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -0500252 for (int N = 0; N < 64; N++) {
253 program.eval(N);
254 }
255 });
256
Mike Kleined9b1f12020-02-06 13:02:32 -0600257 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -0500258 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -0500259 }
260}
261
Mike Klein10fc1e62020-04-13 11:57:05 -0500262DEF_TEST(SkVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -0600263 skvm::Builder b;
264 b.store32(b.varying<int>(), b.splat(42));
265
Mike Klein10fc1e62020-04-13 11:57:05 -0500266 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
267 int buf[18];
268 buf[17] = 47;
Mike Kleinb6149312020-02-26 13:04:23 -0600269
Mike Klein10fc1e62020-04-13 11:57:05 -0500270 p.eval(17, buf);
271 for (int i = 0; i < 17; i++) {
272 REPORTER_ASSERT(r, buf[i] == 42);
273 }
274 REPORTER_ASSERT(r, buf[17] == 47);
275 });
Mike Kleinb6149312020-02-26 13:04:23 -0600276}
Mike Klein11efa182020-02-27 12:04:37 -0600277
Mike Klein10fc1e62020-04-13 11:57:05 -0500278DEF_TEST(SkVM_memcpy, r) {
Mike Klein11efa182020-02-27 12:04:37 -0600279 skvm::Builder b;
280 {
281 auto src = b.varying<int>(),
282 dst = b.varying<int>();
283 b.store32(dst, b.load32(src));
284 }
285
Mike Klein10fc1e62020-04-13 11:57:05 -0500286 test_jit_and_interpreter(b.done(), [&](const skvm::Program& p) {
287 int src[] = {1,2,3,4,5,6,7,8,9},
288 dst[] = {0,0,0,0,0,0,0,0,0};
Mike Klein11efa182020-02-27 12:04:37 -0600289
Mike Klein10fc1e62020-04-13 11:57:05 -0500290 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
291 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
292 REPORTER_ASSERT(r, dst[i] == src[i]);
293 }
294 size_t i = SK_ARRAY_COUNT(src)-1;
295 REPORTER_ASSERT(r, dst[i] == 0);
296 });
Mike Klein11efa182020-02-27 12:04:37 -0600297}
Mike Kleinb6149312020-02-26 13:04:23 -0600298
Mike Klein81756e42019-06-12 11:36:28 -0500299DEF_TEST(SkVM_LoopCounts, r) {
300 // Make sure we cover all the exact N we want.
301
Mike Klein9977efa2019-07-15 12:22:36 -0500302 // buf[i] += 1
303 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500304 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500305 b.store32(arg,
306 b.add(b.splat(1),
307 b.load32(arg)));
308
Mike Klein10fc1e62020-04-13 11:57:05 -0500309 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500310 int buf[64];
311 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500312 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
313 buf[i] = i;
314 }
315 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500316
Mike Klein9977efa2019-07-15 12:22:36 -0500317 for (int i = 0; i < N; i++) {
318 REPORTER_ASSERT(r, buf[i] == i+1);
319 }
320 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
321 REPORTER_ASSERT(r, buf[i] == i);
322 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500323 }
324 });
Mike Klein81756e42019-06-12 11:36:28 -0500325}
Mike Klein05642042019-06-18 12:16:06 -0500326
Mike Kleinb2b6a992020-01-13 16:34:30 -0600327DEF_TEST(SkVM_gather32, r) {
328 skvm::Builder b;
329 {
330 skvm::Arg uniforms = b.uniform(),
331 buf = b.varying<int>();
332 skvm::I32 x = b.load32(buf);
333 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
334 }
335
Mike Klein10fc1e62020-04-13 11:57:05 -0500336 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb2b6a992020-01-13 16:34:30 -0600337 const int img[] = {12,34,56,78, 90,98,76,54};
338
339 int buf[20];
340 for (int i = 0; i < 20; i++) {
341 buf[i] = i;
342 }
343
344 struct Uniforms {
345 const int* img;
346 } uniforms{img};
347
348 program.eval(20, &uniforms, buf);
349 int i = 0;
350 REPORTER_ASSERT(r, buf[i] == 12); i++;
351 REPORTER_ASSERT(r, buf[i] == 34); i++;
352 REPORTER_ASSERT(r, buf[i] == 56); i++;
353 REPORTER_ASSERT(r, buf[i] == 78); i++;
354 REPORTER_ASSERT(r, buf[i] == 90); i++;
355 REPORTER_ASSERT(r, buf[i] == 98); i++;
356 REPORTER_ASSERT(r, buf[i] == 76); i++;
357 REPORTER_ASSERT(r, buf[i] == 54); i++;
358
359 REPORTER_ASSERT(r, buf[i] == 12); i++;
360 REPORTER_ASSERT(r, buf[i] == 34); i++;
361 REPORTER_ASSERT(r, buf[i] == 56); i++;
362 REPORTER_ASSERT(r, buf[i] == 78); i++;
363 REPORTER_ASSERT(r, buf[i] == 90); i++;
364 REPORTER_ASSERT(r, buf[i] == 98); i++;
365 REPORTER_ASSERT(r, buf[i] == 76); i++;
366 REPORTER_ASSERT(r, buf[i] == 54); i++;
367
368 REPORTER_ASSERT(r, buf[i] == 12); i++;
369 REPORTER_ASSERT(r, buf[i] == 34); i++;
370 REPORTER_ASSERT(r, buf[i] == 56); i++;
371 REPORTER_ASSERT(r, buf[i] == 78); i++;
372 });
373}
374
Mike Klein81d52672019-07-30 11:11:09 -0500375DEF_TEST(SkVM_gathers, r) {
376 skvm::Builder b;
377 {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600378 skvm::Arg uniforms = b.uniform(),
379 buf32 = b.varying<int>(),
380 buf16 = b.varying<uint16_t>(),
381 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500382
383 skvm::I32 x = b.load32(buf32);
384
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600385 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
386 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
387 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500388 }
389
Mike Klein10fc1e62020-04-13 11:57:05 -0500390 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500391 const int img[] = {12,34,56,78, 90,98,76,54};
392
393 constexpr int N = 20;
394 int buf32[N];
395 uint16_t buf16[N];
396 uint8_t buf8 [N];
397
398 for (int i = 0; i < 20; i++) {
399 buf32[i] = i;
400 }
401
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600402 struct Uniforms {
403 const int* img;
404 } uniforms{img};
405
406 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500407 int i = 0;
408 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
409 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
410 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
411 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
412 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
413 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
414 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
415 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
416
417 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
418 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
419 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
420 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
421 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
422 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
423 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
424 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
425
426 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
427 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
428 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
429 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
430 });
431}
432
Mike Klein21e85eb2020-04-17 13:57:13 -0500433DEF_TEST(SkVM_gathers2, r) {
434 skvm::Builder b;
435 {
436 skvm::Arg uniforms = b.uniform(),
437 buf32 = b.varying<int>(),
438 buf16 = b.varying<uint16_t>(),
439 buf8 = b.varying<uint8_t>();
440
441 skvm::I32 x = b.load32(buf32);
442
443 b.store32(buf32, b.gather32(uniforms,0, x));
444 b.store16(buf16, b.gather16(uniforms,0, x));
445 b.store8 (buf8 , b.gather8 (uniforms,0, x));
446 }
447
448 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
449 uint8_t img[256];
450 for (int i = 0; i < 256; i++) {
451 img[i] = i;
452 }
453
454 int buf32[64];
455 uint16_t buf16[64];
456 uint8_t buf8 [64];
457
458 for (int i = 0; i < 64; i++) {
459 buf32[i] = (i*47)&63;
460 buf16[i] = 0;
461 buf8 [i] = 0;
462 }
463
464 struct Uniforms {
465 const uint8_t* img;
466 } uniforms{img};
467
468 program.eval(64, &uniforms, buf32, buf16, buf8);
469
470 for (int i = 0; i < 64; i++) {
471 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
472 }
473
474 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
475 REPORTER_ASSERT(r, buf16[63] == 0x2322);
476
477 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
478 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
479 });
480}
481
Mike Klein81d52672019-07-30 11:11:09 -0500482DEF_TEST(SkVM_bitops, r) {
483 skvm::Builder b;
484 {
485 skvm::Arg ptr = b.varying<int>();
486
487 skvm::I32 x = b.load32(ptr);
488
Mike Klein4067a942020-04-05 10:25:32 -0500489 x = b.bit_and (x, b.splat(0xf1)); // 0x40
490 x = b.bit_or (x, b.splat(0x80)); // 0xc0
491 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
492 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500493
494 x = b.shl(x, 28); // 0xe000'0000
495 x = b.sra(x, 28); // 0xffff'fffe
496 x = b.shr(x, 1); // 0x7fff'ffff
497
498 b.store32(ptr, x);
499 }
500
Mike Klein10fc1e62020-04-13 11:57:05 -0500501 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500502 int x = 0x42;
503 program.eval(1, &x);
504 REPORTER_ASSERT(r, x == 0x7fff'ffff);
505 });
506}
507
Mike Klein4067a942020-04-05 10:25:32 -0500508DEF_TEST(SkVM_select_is_NaN, r) {
509 skvm::Builder b;
510 {
511 skvm::Arg src = b.varying<float>(),
512 dst = b.varying<float>();
513
514 skvm::F32 x = b.loadF(src);
515 x = select(is_NaN(x), b.splat(0.0f)
516 , x);
517 b.storeF(dst, x);
518 }
519
520 std::vector<skvm::OptimizedInstruction> program = b.optimize();
521 REPORTER_ASSERT(r, program.size() == 4);
522 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
523 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
524 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
525 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
526
Mike Klein10fc1e62020-04-13 11:57:05 -0500527 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4067a942020-04-05 10:25:32 -0500528 // ±NaN, ±0, ±1, ±inf
529 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
530 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
531 uint32_t dst[SK_ARRAY_COUNT(src)];
532 program.eval(SK_ARRAY_COUNT(src), src, dst);
533
534 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
535 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
536 }
537 });
538}
539
Mike Klein81d52672019-07-30 11:11:09 -0500540DEF_TEST(SkVM_f32, r) {
541 skvm::Builder b;
542 {
543 skvm::Arg arg = b.varying<float>();
544
Mike Reedf5ff4c22020-03-23 14:57:53 -0400545 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500546 y = b.add(x,x), // y = 2x
547 z = b.sub(y,x), // z = 2x-x = x
548 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400549 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500550 }
551
Mike Klein10fc1e62020-04-13 11:57:05 -0500552 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500553 float buf[] = { 1,2,3,4,5,6,7,8,9 };
554 program.eval(SK_ARRAY_COUNT(buf), buf);
555 for (float v : buf) {
556 REPORTER_ASSERT(r, v == 1.0f);
557 }
558 });
559}
560
561DEF_TEST(SkVM_cmp_i32, r) {
562 skvm::Builder b;
563 {
564 skvm::I32 x = b.load32(b.varying<int>());
565
566 auto to_bit = [&](int shift, skvm::I32 mask) {
567 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
568 };
569
570 skvm::I32 m = b.splat(0);
571 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
572 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
573 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
574 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
575 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
576 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
577
578 b.store32(b.varying<int>(), m);
579 }
Mike Klein10fc1e62020-04-13 11:57:05 -0500580 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500581 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
582 int out[SK_ARRAY_COUNT(in)];
583
584 program.eval(SK_ARRAY_COUNT(in), in, out);
585
586 REPORTER_ASSERT(r, out[0] == 0b001111);
587 REPORTER_ASSERT(r, out[1] == 0b001100);
588 REPORTER_ASSERT(r, out[2] == 0b001010);
589 REPORTER_ASSERT(r, out[3] == 0b001010);
590 REPORTER_ASSERT(r, out[4] == 0b000010);
591 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
592 REPORTER_ASSERT(r, out[i] == 0b110010);
593 }
594 });
595}
596
597DEF_TEST(SkVM_cmp_f32, r) {
598 skvm::Builder b;
599 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400600 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500601
602 auto to_bit = [&](int shift, skvm::I32 mask) {
603 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
604 };
605
606 skvm::I32 m = b.splat(0);
607 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
608 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
609 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
610 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
611 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
612 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
613
614 b.store32(b.varying<int>(), m);
615 }
616
Mike Klein10fc1e62020-04-13 11:57:05 -0500617 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500618 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
619 int out[SK_ARRAY_COUNT(in)];
620
621 program.eval(SK_ARRAY_COUNT(in), in, out);
622
623 REPORTER_ASSERT(r, out[0] == 0b001111);
624 REPORTER_ASSERT(r, out[1] == 0b001100);
625 REPORTER_ASSERT(r, out[2] == 0b001010);
626 REPORTER_ASSERT(r, out[3] == 0b001010);
627 REPORTER_ASSERT(r, out[4] == 0b000010);
628 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
629 REPORTER_ASSERT(r, out[i] == 0b110010);
630 }
631 });
632}
633
Mike Klein14548b92020-02-28 14:02:29 -0600634DEF_TEST(SkVM_index, r) {
635 skvm::Builder b;
636 b.store32(b.varying<int>(), b.index());
637
Mike Klein10fc1e62020-04-13 11:57:05 -0500638 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein14548b92020-02-28 14:02:29 -0600639 int buf[23];
640 program.eval(SK_ARRAY_COUNT(buf), buf);
641 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
642 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
643 }
644 });
645}
646
Mike Klein4a131192019-07-19 13:56:41 -0500647DEF_TEST(SkVM_mad, r) {
648 // This program is designed to exercise the tricky corners of instruction
649 // and register selection for Op::mad_f32.
650
651 skvm::Builder b;
652 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500653 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500654
655 skvm::F32 x = b.to_f32(b.load32(arg)),
656 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
657 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
658 w = b.mad(z,z,y), // w can alias z but not y.
659 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600660 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500661 }
662
Mike Klein10fc1e62020-04-13 11:57:05 -0500663 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500664 int x = 2;
665 program.eval(1, &x);
666 // x = 2
667 // y = 2*2 + 2 = 6
668 // z = 6*6 + 2 = 38
669 // w = 38*38 + 6 = 1450
670 // v = 1450*6 + 1450 = 10150
671 REPORTER_ASSERT(r, x == 10150);
672 });
673}
674
Mike Klein7c0332c2020-03-05 14:18:04 -0600675DEF_TEST(SkVM_fms, r) {
676 // Create a pattern that can be peepholed into an Op::fms_f32.
677 skvm::Builder b;
678 {
679 skvm::Arg arg = b.varying<int>();
680
681 skvm::F32 x = b.to_f32(b.load32(arg)),
682 v = b.sub(b.mul(x, b.splat(2.0f)),
683 b.splat(1.0f));
684 b.store32(arg, b.trunc(v));
685 }
686
Mike Klein10fc1e62020-04-13 11:57:05 -0500687 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600688 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
689 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
690
691 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
692 REPORTER_ASSERT(r, buf[i] = 2*i-1);
693 }
694 });
695}
696
697DEF_TEST(SkVM_fnma, r) {
698 // Create a pattern that can be peepholed into an Op::fnma_f32.
699 skvm::Builder b;
700 {
701 skvm::Arg arg = b.varying<int>();
702
703 skvm::F32 x = b.to_f32(b.load32(arg)),
704 v = b.sub(b.splat(1.0f),
705 b.mul(x, b.splat(2.0f)));
706 b.store32(arg, b.trunc(v));
707 }
708
Mike Klein10fc1e62020-04-13 11:57:05 -0500709 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein7c0332c2020-03-05 14:18:04 -0600710 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
711 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
712
713 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
714 REPORTER_ASSERT(r, buf[i] = 1-2*i);
715 }
716 });
717}
718
Mike Klein81d52672019-07-30 11:11:09 -0500719DEF_TEST(SkVM_madder, r) {
720 skvm::Builder b;
721 {
722 skvm::Arg arg = b.varying<float>();
723
Mike Reedf5ff4c22020-03-23 14:57:53 -0400724 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500725 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
726 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
727 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400728 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500729 }
730
Mike Klein10fc1e62020-04-13 11:57:05 -0500731 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500732 float x = 2.0f;
733 // y = 2*2 + 2 = 6
734 // z = 6*2 + 6 = 18
735 // w = 6*6 + 18 = 54
736 program.eval(1, &x);
737 REPORTER_ASSERT(r, x == 54.0f);
738 });
739}
740
Mike Kleinf22faaf2020-01-09 07:27:39 -0600741DEF_TEST(SkVM_floor, r) {
742 skvm::Builder b;
743 {
744 skvm::Arg arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400745 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600746 }
747
Mike Klein10fc1e62020-04-13 11:57:05 -0500748 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf22faaf2020-01-09 07:27:39 -0600749 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
750 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
751 program.eval(SK_ARRAY_COUNT(buf), buf);
752 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
753 REPORTER_ASSERT(r, buf[i] == want[i]);
754 }
755 });
756}
757
Mike Klein5caf7de2020-03-12 11:05:46 -0500758DEF_TEST(SkVM_round, r) {
759 skvm::Builder b;
760 {
761 skvm::Arg src = b.varying<float>();
762 skvm::Arg dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400763 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500764 }
765
766 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
767 // We haven't explicitly guaranteed that here... it just probably is.
Mike Klein10fc1e62020-04-13 11:57:05 -0500768 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5caf7de2020-03-12 11:05:46 -0500769 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
770 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
771 int dst[SK_ARRAY_COUNT(buf)];
772
773 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
774 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
775 REPORTER_ASSERT(r, dst[i] == want[i]);
776 }
777 });
778}
779
Herb Derbyc02a41f2020-02-28 14:25:45 -0600780DEF_TEST(SkVM_min, r) {
781 skvm::Builder b;
782 {
783 skvm::Arg src1 = b.varying<float>();
784 skvm::Arg src2 = b.varying<float>();
785 skvm::Arg dst = b.varying<float>();
786
Mike Reedf5ff4c22020-03-23 14:57:53 -0400787 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600788 }
789
Mike Klein10fc1e62020-04-13 11:57:05 -0500790 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600791 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
792 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
793 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
794 float d[SK_ARRAY_COUNT(s1)];
795 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
796 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
797 REPORTER_ASSERT(r, d[i] == want[i]);
798 }
799 });
800}
801
802DEF_TEST(SkVM_max, r) {
803 skvm::Builder b;
804 {
805 skvm::Arg src1 = b.varying<float>();
806 skvm::Arg src2 = b.varying<float>();
807 skvm::Arg dst = b.varying<float>();
808
Mike Reedf5ff4c22020-03-23 14:57:53 -0400809 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600810 }
811
Mike Klein10fc1e62020-04-13 11:57:05 -0500812 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Herb Derbyc02a41f2020-02-28 14:25:45 -0600813 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
814 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
815 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
816 float d[SK_ARRAY_COUNT(s1)];
817 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
818 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
819 REPORTER_ASSERT(r, d[i] == want[i]);
820 }
821 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600822}
823
Mike Kleinf98d0d32019-07-22 14:30:18 -0500824DEF_TEST(SkVM_hoist, r) {
825 // This program uses enough constants that it will fail to JIT if we hoist them.
826 // The JIT will try again without hoisting, and that'll just need 2 registers.
827 skvm::Builder b;
828 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500829 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500830 skvm::I32 x = b.load32(arg);
831 for (int i = 0; i < 32; i++) {
832 x = b.add(x, b.splat(i));
833 }
834 b.store32(arg, x);
835 }
836
Mike Klein10fc1e62020-04-13 11:57:05 -0500837 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500838 int x = 4;
839 program.eval(1, &x);
840 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
841 // x += 496
842 REPORTER_ASSERT(r, x == 500);
843 });
844}
845
Mike Kleinb9944122019-08-02 12:22:39 -0500846DEF_TEST(SkVM_select, r) {
847 skvm::Builder b;
848 {
849 skvm::Arg buf = b.varying<int>();
850
851 skvm::I32 x = b.load32(buf);
852
853 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
854
855 b.store32(buf, x);
856 }
857
Mike Klein10fc1e62020-04-13 11:57:05 -0500858 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500859 int buf[] = { 0,1,2,3,4,5,6,7,8 };
860 program.eval(SK_ARRAY_COUNT(buf), buf);
861 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
862 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
863 }
864 });
865}
866
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500867DEF_TEST(SkVM_NewOps, r) {
868 // Exercise a somewhat arbitrary set of new ops.
869 skvm::Builder b;
870 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500871 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500872 uniforms = b.uniform();
873
874 skvm::I32 x = b.load16(buf);
875
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600876 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500877
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600878 x = b.add(x, b.uniform32(uniforms, kPtr+0));
879 x = b.mul(x, b.uniform8 (uniforms, kPtr+4));
880 x = b.sub(x, b.uniform16(uniforms, kPtr+6));
881
882 skvm::I32 limit = b.uniform32(uniforms, kPtr+8);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500883 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
884 x = b.select(b.gt(x, limit ), limit , x);
885
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600886 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500887
888 b.store16(buf, x);
889 }
890
891 if ((false)) {
892 SkDynamicMemoryWStream buf;
893 dump(b, &buf);
894 sk_sp<SkData> blob = buf.detachAsData();
895 SkDebugf("%.*s\n", blob->size(), blob->data());
896 }
897
Mike Klein10fc1e62020-04-13 11:57:05 -0500898 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500899 const int N = 31;
900 int16_t buf[N];
901 for (int i = 0; i < N; i++) {
902 buf[i] = i;
903 }
904
905 const int M = 16;
906 uint8_t img[M];
907 for (int i = 0; i < M; i++) {
908 img[i] = i*i;
909 }
910
911 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600912 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500913 int add = 5;
914 uint8_t mul = 3;
915 uint16_t sub = 18;
916 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600917 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500918
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600919 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500920
921 for (int i = 0; i < N; i++) {
922 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
923 int x = 3*(i-1);
924
925 // Then that's pinned to the limits of img.
926 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
927 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
928 REPORTER_ASSERT(r, buf[i] == img[x]);
929 }
930 });
931}
932
Mike Klein5a8404c2020-02-28 14:24:56 -0600933DEF_TEST(SkVM_sqrt, r) {
934 skvm::Builder b;
935 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400936 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -0600937
Mike Klein10fc1e62020-04-13 11:57:05 -0500938 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein5a8404c2020-02-28 14:24:56 -0600939 constexpr int K = 17;
940 float buf[K];
941 for (int i = 0; i < K; i++) {
942 buf[i] = (float)(i*i);
943 }
944
945 // x^2 -> x
946 program.eval(K, buf);
947
948 for (int i = 0; i < K; i++) {
949 REPORTER_ASSERT(r, buf[i] == (float)i);
950 }
951 });
952}
953
Mike Klein3f7c8652019-11-07 10:33:56 -0600954DEF_TEST(SkVM_MSAN, r) {
955 // This little memset32() program should be able to JIT, but if we run that
956 // JIT code in an MSAN build, it won't see the writes initialize buf. So
957 // this tests that we're using the interpreter instead.
958 skvm::Builder b;
959 b.store32(b.varying<int>(), b.splat(42));
960
Mike Klein10fc1e62020-04-13 11:57:05 -0500961 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein3f7c8652019-11-07 10:33:56 -0600962 constexpr int K = 17;
963 int buf[K]; // Intentionally uninitialized.
964 program.eval(K, buf);
965 sk_msan_assert_initialized(buf, buf+K);
966 for (int x : buf) {
967 REPORTER_ASSERT(r, x == 42);
968 }
969 });
970}
971
Mike Klein13601172019-11-08 15:01:02 -0600972DEF_TEST(SkVM_assert, r) {
973 skvm::Builder b;
974 b.assert_true(b.lt(b.load32(b.varying<int>()),
975 b.splat(42)));
976
Mike Klein10fc1e62020-04-13 11:57:05 -0500977 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -0600978 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -0600979 program.eval(SK_ARRAY_COUNT(buf), buf);
980 });
981}
982
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600983DEF_TEST(SkVM_premul, reporter) {
984 // Test that premul is short-circuited when alpha is known opaque.
985 {
986 skvm::Builder p;
987 auto rptr = p.varying<int>(),
988 aptr = p.varying<int>();
989
Mike Reedf5ff4c22020-03-23 14:57:53 -0400990 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600991 g = p.splat(0.0f),
992 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -0400993 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600994
995 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400996 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -0600997
998 // load red, load alpha, red *= alpha, store red
999 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
1000 }
1001
1002 {
1003 skvm::Builder p;
1004 auto rptr = p.varying<int>();
1005
Mike Reedf5ff4c22020-03-23 14:57:53 -04001006 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001007 g = p.splat(0.0f),
1008 b = p.splat(0.0f),
1009 a = p.splat(1.0f);
1010
1011 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001012 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001013
1014 // load red, store red
1015 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1016 }
1017
1018 // Same deal for unpremul.
1019 {
1020 skvm::Builder p;
1021 auto rptr = p.varying<int>(),
1022 aptr = p.varying<int>();
1023
Mike Reedf5ff4c22020-03-23 14:57:53 -04001024 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001025 g = p.splat(0.0f),
1026 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001027 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001028
1029 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001030 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001031
1032 // load red, load alpha, a bunch of unpremul instructions, store red
1033 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1034 }
1035
1036 {
1037 skvm::Builder p;
1038 auto rptr = p.varying<int>();
1039
Mike Reedf5ff4c22020-03-23 14:57:53 -04001040 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001041 g = p.splat(0.0f),
1042 b = p.splat(0.0f),
1043 a = p.splat(1.0f);
1044
1045 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001046 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001047
1048 // load red, store red
1049 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1050 }
1051}
Mike Klein05642042019-06-18 12:16:06 -05001052
Mike Klein05642042019-06-18 12:16:06 -05001053template <typename Fn>
1054static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -04001055 uint8_t buf[4096];
1056 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -05001057 fn(a);
1058
1059 REPORTER_ASSERT(r, a.size() == expected.size());
1060
Mike Klein88c0a902019-06-24 15:34:02 -04001061 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -05001062 want = expected.begin();
1063 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -05001064 REPORTER_ASSERT(r, got[i] == want[i],
1065 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -05001066 }
1067}
1068
1069DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -05001070 // Easiest way to generate test cases is
1071 //
1072 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1073 //
1074 // The -x86-asm-syntax=intel bit is optional, controlling the
1075 // input syntax only; the output will always be AT&T op x,y,dst style.
1076 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1077 // that a bit easier to use here, despite maybe favoring AT&T overall.
1078
1079 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -05001080 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -05001081 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -06001082 a.int3();
Mike Klein05642042019-06-18 12:16:06 -05001083 a.vzeroupper();
1084 a.ret();
1085 },{
Mike Kleinee5864a2019-11-11 09:16:44 -06001086 0xcc,
Mike Klein05642042019-06-18 12:16:06 -05001087 0xc5, 0xf8, 0x77,
1088 0xc3,
1089 });
1090
Mike Klein237dbb42019-07-19 09:44:47 -05001091 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -05001092 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -05001093 a.ret();
1094 a.align(4);
1095 },{
1096 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -05001097 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -05001098 });
Mike Klein61703a62019-06-18 15:01:12 -05001099
Mike Klein397fc882019-06-20 11:37:10 -05001100 test_asm(r, [&](A& a) {
1101 a.add(A::rax, 8); // Always good to test rax.
1102 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001103
Mike Klein397fc882019-06-20 11:37:10 -05001104 a.add(A::rdi, 12); // Last 0x48 REX
1105 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001106
Mike Klein86a645c2019-07-12 12:29:39 -05001107 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001108 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001109
Mike Klein397fc882019-06-20 11:37:10 -05001110 a.add(A::rsi, 128); // Requires 4 byte immediate.
1111 a.sub(A::r8 , 1000000);
Mike Kleinc15c9362020-04-16 11:10:36 -05001112
1113 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1114 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1115 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
1116 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
1117 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1118 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1119 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1120
1121 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1122
1123 a.add( A::rax , A::rcx); // addq %rcx, %rax
1124 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1125 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1126 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1127
1128 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
Mike Klein61703a62019-06-18 15:01:12 -05001129 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001130 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001131 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001132
1133 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001134 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001135
Mike Klein86a645c2019-07-12 12:29:39 -05001136 0x49, 0x83, 0b11'000'000, 0x07,
1137 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001138
1139 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001140 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Kleinc15c9362020-04-16 11:10:36 -05001141
1142 0x48,0x83,0x06,0x07,
1143 0x48,0x83,0x46,0x0c,0x07,
1144 0x48,0x83,0x44,0x24,0x0c,0x07,
1145 0x48,0x83,0x44,0x84,0x0c,0x07,
1146 0x4b,0x83,0x44,0x43,0x0c,0x07,
1147 0x49,0x83,0x44,0x03,0x0c,0x07,
1148 0x4a,0x83,0x44,0x18,0x0c,0x07,
1149
1150 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1151
1152 0x48,0x01,0xc8,
1153 0x48,0x01,0x08,
1154 0x48,0x01,0x48,0x0c,
1155 0x48,0x03,0x48,0x0c,
1156 0x48,0x2b,0x48,0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001157 });
Mike Klein397fc882019-06-20 11:37:10 -05001158
1159
1160 test_asm(r, [&](A& a) {
1161 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1162 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1163 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1164 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1165 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1166 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1167 },{
1168 /* VEX */ /*op*/ /*modRM*/
1169 0xc5, 0xf5, 0xfe, 0xc2,
1170 0xc5, 0x75, 0xfe, 0xc2,
1171 0xc5, 0xbd, 0xfe, 0xc2,
1172 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1173 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1174 0xc5, 0xf5, 0xfa, 0xc2,
1175 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001176
1177 test_asm(r, [&](A& a) {
Mike Klein48c51bb2020-04-23 18:07:49 -05001178 A::Label l;
1179 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
Mike Klein714f8cc2019-11-06 12:54:46 -06001180 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1181 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1182 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1183 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1184 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1185 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Klein48c51bb2020-04-23 18:07:49 -05001186 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
Mike Kleinb9944122019-08-02 12:22:39 -05001187 },{
Mike Klein48c51bb2020-04-23 18:07:49 -05001188 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
Mike Kleinb9944122019-08-02 12:22:39 -05001189 0xc5,0xf5,0x76,0xc2,
1190 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001191 0xc5,0xf4,0xc2,0xc2,0x00,
1192 0xc5,0xf4,0xc2,0xc2,0x01,
1193 0xc5,0xf4,0xc2,0xc2,0x02,
1194 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001195 });
1196
1197 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001198 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1199 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1200 },{
1201 0xc5,0xf4,0x5d,0xc2,
1202 0xc5,0xf4,0x5f,0xc2,
1203 });
1204
1205 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001206 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1207 },{
1208 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1209 });
1210
1211 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001212 a.vpsrld(A::ymm15, A::ymm2, 8);
1213 a.vpsrld(A::ymm0 , A::ymm8, 5);
1214 },{
1215 0xc5, 0x85, 0x72,0xd2, 0x08,
1216 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1217 });
1218
1219 test_asm(r, [&](A& a) {
1220 a.vpermq(A::ymm1, A::ymm2, 5);
1221 },{
1222 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1223 });
Mike Kleine5053412019-06-21 12:37:22 -05001224
1225 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001226 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1227 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1228 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1229 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1230 },{
1231 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1232 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1233 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1234 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1235 });
1236
1237 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001238 A::Label l;
1239 a.label(&l);
Mike Kleine5053412019-06-21 12:37:22 -05001240 a.byte(1);
1241 a.byte(2);
1242 a.byte(3);
1243 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001244
Mike Klein65c10b52019-07-12 09:22:21 -05001245 a.vbroadcastss(A::ymm0 , &l);
1246 a.vbroadcastss(A::ymm1 , &l);
1247 a.vbroadcastss(A::ymm8 , &l);
1248 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001249
Mike Klein65c10b52019-07-12 09:22:21 -05001250 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001251 a.vpaddd (A::ymm4, A::ymm3, &l);
1252 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001253
1254 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001255
1256 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001257 },{
1258 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001259
Mike Kleine5053412019-06-21 12:37:22 -05001260 /* VEX */ /*op*/ /* ModRM */ /* offset */
1261 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1262 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1263 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1264 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001265
1266 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001267
1268 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1269 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001270
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001271 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1272
1273 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001274 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001275
1276 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001277 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1278 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1279 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1280 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
Mike Klein94d054b2019-08-02 10:54:23 -05001281
1282 a.vbroadcastss(A::ymm8, A::xmm0);
1283 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001284 },{
1285 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1286 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1287 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1288 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1289 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001290
1291 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1292 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001293 });
1294
1295 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001296 A::Label l;
1297 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001298 a.jne(&l);
1299 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001300 a.je (&l);
1301 a.jmp(&l);
1302 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001303 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001304
Mike Kleinc15c9362020-04-16 11:10:36 -05001305 a.cmp(A::rdx, 1);
Mike Klein35b97c32019-07-12 12:32:45 -05001306 a.cmp(A::rax, 12);
1307 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001308 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001309 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1310 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1311 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1312 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1313 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001314 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001315
Mike Kleinc15c9362020-04-16 11:10:36 -05001316 0x48,0x83,0xfa,0x01,
Mike Klein35b97c32019-07-12 12:32:45 -05001317 0x48,0x83,0xf8,0x0c,
1318 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001319 });
Mike Klein120d9e82019-06-21 15:52:55 -05001320
1321 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001322 a.vmovups(A::ymm5, A::Mem{A::rsi});
1323 a.vmovups(A::Mem{A::rsi}, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001324
Mike Klein400ba222020-06-30 15:54:19 -05001325 a.vmovups(A::xmm5, A::Mem{A::rsi});
Mike Klein8390f2e2020-04-15 17:03:08 -05001326 a.vmovups(A::Mem{A::rsi}, A::xmm5);
Mike Klein95529e82019-08-02 11:43:43 -05001327
Mike Kleinedc2dac2020-04-15 16:18:27 -05001328 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1329 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
Mike Kleinf3881b22019-06-21 16:20:24 -05001330
Mike Klein8390f2e2020-04-15 17:03:08 -05001331 a.vmovq(A::Mem{A::rdx}, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001332 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001333 /* VEX */ /*Op*/ /* ModRM */
1334 0xc5, 0xfc, 0x10, 0b00'101'110,
1335 0xc5, 0xfc, 0x11, 0b00'101'110,
1336
Mike Klein400ba222020-06-30 15:54:19 -05001337 0xc5, 0xf8, 0x10, 0b00'101'110,
Mike Klein95529e82019-08-02 11:43:43 -05001338 0xc5, 0xf8, 0x11, 0b00'101'110,
1339
Mike Klein52010b72019-08-02 11:18:00 -05001340 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001341 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001342
1343 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001344 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001345
1346 test_asm(r, [&](A& a) {
Mike Kleinedc2dac2020-04-15 16:18:27 -05001347 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1348 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1349 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001350
Mike Kleinedc2dac2020-04-15 16:18:27 -05001351 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1352 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1353 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
Mike Klein5e9f0ee2020-04-07 15:10:15 -05001354 },{
1355 0xc5,0xfc,0x10,0x2c,0x24,
1356 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1357 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1358
1359 0xc5,0xfc,0x11,0x2c,0x24,
1360 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1361 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1362 });
1363
1364 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001365 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1366 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1367 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1368 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1369 a.movzbq(A::r8, A::Mem{A::rsi, 400});
Mike Klein35b97c32019-07-12 12:32:45 -05001370
Mike Kleinc15c9362020-04-16 11:10:36 -05001371 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1372 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1373 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1374 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1375 a.movzwq(A::r8, A::Mem{A::rsi, 400});
Mike Kleincb511042020-04-13 13:12:17 -05001376
Mike Klein8390f2e2020-04-15 17:03:08 -05001377 a.vmovd(A::Mem{A::rax}, A::xmm0);
1378 a.vmovd(A::Mem{A::rax}, A::xmm8);
1379 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1380
1381 a.vmovd(A::xmm0, A::Mem{A::rax});
1382 a.vmovd(A::xmm8, A::Mem{A::rax});
1383 a.vmovd(A::xmm0, A::Mem{A::r8 });
1384
1385 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1386 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1387 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1388
Mike Klein35b97c32019-07-12 12:32:45 -05001389 a.vmovd(A::rax, A::xmm0);
1390 a.vmovd(A::rax, A::xmm8);
Mike Klein8390f2e2020-04-15 17:03:08 -05001391 a.vmovd(A::r8 , A::xmm0);
Mike Klein35b97c32019-07-12 12:32:45 -05001392
1393 a.vmovd(A::xmm0, A::rax);
1394 a.vmovd(A::xmm8, A::rax);
Mike Klein8390f2e2020-04-15 17:03:08 -05001395 a.vmovd(A::xmm0, A::r8 );
Mike Klein35b97c32019-07-12 12:32:45 -05001396
Mike Kleinc15c9362020-04-16 11:10:36 -05001397 a.movb(A::Mem{A::rdx}, A::rax);
1398 a.movb(A::Mem{A::rdx}, A::r8 );
1399 a.movb(A::Mem{A::r8 }, A::rax);
Mike Klein35b97c32019-07-12 12:32:45 -05001400
Mike Kleinc15c9362020-04-16 11:10:36 -05001401 a.movb(A::rdx, A::Mem{A::rax});
1402 a.movb(A::rdx, A::Mem{A::r8 });
1403 a.movb(A::r8 , A::Mem{A::rax});
1404
1405 a.movb(A::rdx, 12);
1406 a.movb(A::rax, 4);
1407 a.movb(A::r8 , -1);
1408
1409 a.movb(A::Mem{A::rdx}, 12);
1410 a.movb(A::Mem{A::rax}, 4);
1411 a.movb(A::Mem{A::r8 }, -1);
1412 },{
1413 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1414 0x49,0x0f,0xb6,0x00,
1415 0x4c,0x0f,0xb6,0x06,
1416 0x4c,0x0f,0xb6,0x46, 12,
1417 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1418
1419 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1420 0x49,0x0f,0xb7,0x00,
1421 0x4c,0x0f,0xb7,0x06,
1422 0x4c,0x0f,0xb7,0x46, 12,
1423 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
Mike Kleincb511042020-04-13 13:12:17 -05001424
Mike Klein35b97c32019-07-12 12:32:45 -05001425 0xc5,0xf9,0x7e,0x00,
1426 0xc5,0x79,0x7e,0x00,
1427 0xc4,0xc1,0x79,0x7e,0x00,
1428
1429 0xc5,0xf9,0x6e,0x00,
1430 0xc5,0x79,0x6e,0x00,
1431 0xc4,0xc1,0x79,0x6e,0x00,
1432
Mike Klein93d3fab2020-01-14 10:46:44 -06001433 0xc5,0xf9,0x6e,0x04,0x88,
1434 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1435 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1436
Mike Klein35b97c32019-07-12 12:32:45 -05001437 0xc5,0xf9,0x7e,0xc0,
1438 0xc5,0x79,0x7e,0xc0,
1439 0xc4,0xc1,0x79,0x7e,0xc0,
1440
1441 0xc5,0xf9,0x6e,0xc0,
1442 0xc5,0x79,0x6e,0xc0,
1443 0xc4,0xc1,0x79,0x6e,0xc0,
1444
Mike Kleinc15c9362020-04-16 11:10:36 -05001445 0x48 ,0x88, 0x02,
1446 0x4c, 0x88, 0x02,
1447 0x49, 0x88, 0x00,
1448
1449 0x48 ,0x8a, 0x10,
1450 0x49, 0x8a, 0x10,
1451 0x4c, 0x8a, 0x00,
1452
1453 0x48, 0xc6, 0xc2, 0x0c,
1454 0x48, 0xc6, 0xc0, 0x04,
1455 0x49, 0xc6, 0xc0, 0xff,
1456
1457 0x48, 0xc6, 0x02, 0x0c,
1458 0x48, 0xc6, 0x00, 0x04,
1459 0x49, 0xc6, 0x00, 0xff,
Mike Klein35b97c32019-07-12 12:32:45 -05001460 });
1461
1462 test_asm(r, [&](A& a) {
Mike Klein8390f2e2020-04-15 17:03:08 -05001463 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1464 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
Mike Klein52010b72019-08-02 11:18:00 -05001465
Mike Klein8390f2e2020-04-15 17:03:08 -05001466 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
1467 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $4, (%rsi), %xmm8, %xmm1
Mike Klein35b97c32019-07-12 12:32:45 -05001468
Mike Klein21e85eb2020-04-17 13:57:13 -05001469 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1470 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1471
1472 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1473 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1474
Mike Klein8390f2e2020-04-15 17:03:08 -05001475 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1476 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein95529e82019-08-02 11:43:43 -05001477
Mike Klein8390f2e2020-04-15 17:03:08 -05001478 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1479 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
Mike Klein35b97c32019-07-12 12:32:45 -05001480 },{
Mike Klein52010b72019-08-02 11:18:00 -05001481 0xc5,0xb9, 0xc4, 0x0e, 4,
1482 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1483
Mike Klein35b97c32019-07-12 12:32:45 -05001484 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1485 0xc4,0x43,0x71, 0x20, 0x00, 12,
1486
Mike Klein21e85eb2020-04-17 13:57:13 -05001487 0xc4,0x63,0x7d,0x39,0xc1, 1,
1488 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1489
1490 0xc4,0x63,0x79,0x16,0x06, 3,
1491 0xc4,0xc3,0x79,0x16,0x08, 2,
1492
Mike Klein95529e82019-08-02 11:43:43 -05001493 0xc4,0x63,0x79, 0x15, 0x06, 7,
1494 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1495
Mike Klein35b97c32019-07-12 12:32:45 -05001496 0xc4,0x63,0x79, 0x14, 0x06, 7,
1497 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1498 });
1499
1500 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001501 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1502 },{
1503 0xc5, 0x9d, 0xdf, 0xda,
1504 });
Mike Klein9f4df802019-06-24 18:47:16 -04001505
Mike Kleind4546d62019-07-30 12:15:40 -05001506 test_asm(r, [&](A& a) {
Mike Klein9bb886732020-04-13 16:50:39 -05001507 A::Label l;
1508 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1509
1510 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1511 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1512 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1513
1514 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1515 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1516
1517 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1518 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1519 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1520 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1521 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1522
1523 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1524 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1525 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1526
Mike Kleind4546d62019-07-30 12:15:40 -05001527 a.vcvttps2dq(A::ymm3, A::ymm2);
1528 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001529 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001530 a.vsqrtps (A::ymm3, A::ymm2);
Mike Klein9bb886732020-04-13 16:50:39 -05001531 a.label(&l);
Mike Kleind4546d62019-07-30 12:15:40 -05001532 },{
1533 0xc5,0xfd,0x6f,0xda,
Mike Klein9bb886732020-04-13 16:50:39 -05001534
1535 0xc5,0xfd,0x6f,0x1e,
1536 0xc5,0xfd,0x6f,0x1c,0x24,
1537 0xc4,0xc1,0x7d,0x6f,0x1b,
1538
1539 0xc5,0xfd,0x6f,0x5e,0x04,
1540 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1541
1542 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1543 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1544 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1545 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1546 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1547
1548 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1549 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1550
1551 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1552
Mike Kleind4546d62019-07-30 12:15:40 -05001553 0xc5,0xfe,0x5b,0xda,
1554 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001555 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001556 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001557 });
1558
Mike Kleinbeaa1082020-01-13 14:04:18 -06001559 test_asm(r, [&](A& a) {
Mike Klein4d680cd2020-07-15 09:58:51 -05001560 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1561 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1562
1563 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1564 a.vcvtph2ps(A::ymm2, A::xmm3);
1565 },{
1566 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1567 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1568
1569 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1570 0xc4,0xe2,0x7d,0x13,0xd3,
1571 });
1572
1573 test_asm(r, [&](A& a) {
Mike Kleinbeaa1082020-01-13 14:04:18 -06001574 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1575 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1576 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1577 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1578 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1579 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1580 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1581 },{
1582 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1583 0xc4,0xe2,0x75,0x92,0x04,0x10,
1584 0xc4,0x62,0x75,0x92,0x14,0x10,
1585 0xc4,0xa2,0x75,0x92,0x04,0x20,
1586 0xc4,0xc2,0x75,0x92,0x04,0x11,
1587 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1588 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1589 });
1590
Mike Kleinc322f632020-01-13 16:18:58 -06001591 test_asm(r, [&](A& a) {
Mike Kleinc15c9362020-04-16 11:10:36 -05001592 a.mov(A::rax, A::Mem{A::rdi, 0});
1593 a.mov(A::rax, A::Mem{A::rdi, 1});
1594 a.mov(A::rax, A::Mem{A::rdi, 512});
1595 a.mov(A::r15, A::Mem{A::r13, 42});
1596 a.mov(A::rax, A::Mem{A::r13, 42});
1597 a.mov(A::r15, A::Mem{A::rax, 42});
1598 a.mov(A::rax, 1);
1599 a.mov(A::rax, A::rcx);
Mike Kleinc322f632020-01-13 16:18:58 -06001600 },{
1601 0x48, 0x8b, 0x07,
1602 0x48, 0x8b, 0x47, 0x01,
1603 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1604 0x4d, 0x8b, 0x7d, 0x2a,
1605 0x49, 0x8b, 0x45, 0x2a,
1606 0x4c, 0x8b, 0x78, 0x2a,
Mike Kleinc15c9362020-04-16 11:10:36 -05001607 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1608 0x48, 0x89, 0xc8,
Mike Kleinc322f632020-01-13 16:18:58 -06001609 });
1610
Mike Klein9f4df802019-06-24 18:47:16 -04001611 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1612
1613 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001614 a.and16b(A::v4, A::v3, A::v1);
1615 a.orr16b(A::v4, A::v3, A::v1);
1616 a.eor16b(A::v4, A::v3, A::v1);
1617 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001618 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001619 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001620
1621 a.add4s(A::v4, A::v3, A::v1);
1622 a.sub4s(A::v4, A::v3, A::v1);
1623 a.mul4s(A::v4, A::v3, A::v1);
1624
Mike Klein97afd2e2019-10-16 14:11:27 -05001625 a.cmeq4s(A::v4, A::v3, A::v1);
1626 a.cmgt4s(A::v4, A::v3, A::v1);
1627
Mike Klein65809142019-06-25 09:44:02 -04001628 a.sub8h(A::v4, A::v3, A::v1);
1629 a.mul8h(A::v4, A::v3, A::v1);
1630
Mike Klein9f4df802019-06-24 18:47:16 -04001631 a.fadd4s(A::v4, A::v3, A::v1);
1632 a.fsub4s(A::v4, A::v3, A::v1);
1633 a.fmul4s(A::v4, A::v3, A::v1);
1634 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001635 a.fmin4s(A::v4, A::v3, A::v1);
1636 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein7c0332c2020-03-05 14:18:04 -06001637 a.fneg4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001638
Mike Klein65809142019-06-25 09:44:02 -04001639 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001640 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001641
1642 a.fcmeq4s(A::v4, A::v3, A::v1);
1643 a.fcmgt4s(A::v4, A::v3, A::v1);
1644 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001645 },{
Mike Klein65809142019-06-25 09:44:02 -04001646 0x64,0x1c,0x21,0x4e,
1647 0x64,0x1c,0xa1,0x4e,
1648 0x64,0x1c,0x21,0x6e,
1649 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001650 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001651 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001652
1653 0x64,0x84,0xa1,0x4e,
1654 0x64,0x84,0xa1,0x6e,
1655 0x64,0x9c,0xa1,0x4e,
1656
Mike Klein97afd2e2019-10-16 14:11:27 -05001657 0x64,0x8c,0xa1,0x6e,
1658 0x64,0x34,0xa1,0x4e,
1659
Mike Klein65809142019-06-25 09:44:02 -04001660 0x64,0x84,0x61,0x6e,
1661 0x64,0x9c,0x61,0x4e,
1662
Mike Klein9f4df802019-06-24 18:47:16 -04001663 0x64,0xd4,0x21,0x4e,
1664 0x64,0xd4,0xa1,0x4e,
1665 0x64,0xdc,0x21,0x6e,
1666 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001667 0x64,0xf4,0xa1,0x4e,
1668 0x64,0xf4,0x21,0x4e,
Mike Klein7c0332c2020-03-05 14:18:04 -06001669 0x64,0xf8,0xa0,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001670
Mike Klein65809142019-06-25 09:44:02 -04001671 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001672 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001673
1674 0x64,0xe4,0x21,0x4e,
1675 0x64,0xe4,0xa1,0x6e,
1676 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001677 });
1678
1679 test_asm(r, [&](A& a) {
1680 a.shl4s(A::v4, A::v3, 0);
1681 a.shl4s(A::v4, A::v3, 1);
1682 a.shl4s(A::v4, A::v3, 8);
1683 a.shl4s(A::v4, A::v3, 16);
1684 a.shl4s(A::v4, A::v3, 31);
1685
1686 a.sshr4s(A::v4, A::v3, 1);
1687 a.sshr4s(A::v4, A::v3, 8);
1688 a.sshr4s(A::v4, A::v3, 31);
1689
1690 a.ushr4s(A::v4, A::v3, 1);
1691 a.ushr4s(A::v4, A::v3, 8);
1692 a.ushr4s(A::v4, A::v3, 31);
1693
1694 a.ushr8h(A::v4, A::v3, 1);
1695 a.ushr8h(A::v4, A::v3, 8);
1696 a.ushr8h(A::v4, A::v3, 15);
1697 },{
1698 0x64,0x54,0x20,0x4f,
1699 0x64,0x54,0x21,0x4f,
1700 0x64,0x54,0x28,0x4f,
1701 0x64,0x54,0x30,0x4f,
1702 0x64,0x54,0x3f,0x4f,
1703
1704 0x64,0x04,0x3f,0x4f,
1705 0x64,0x04,0x38,0x4f,
1706 0x64,0x04,0x21,0x4f,
1707
1708 0x64,0x04,0x3f,0x6f,
1709 0x64,0x04,0x38,0x6f,
1710 0x64,0x04,0x21,0x6f,
1711
1712 0x64,0x04,0x1f,0x6f,
1713 0x64,0x04,0x18,0x6f,
1714 0x64,0x04,0x11,0x6f,
1715 });
1716
1717 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001718 a.sli4s(A::v4, A::v3, 0);
1719 a.sli4s(A::v4, A::v3, 1);
1720 a.sli4s(A::v4, A::v3, 8);
1721 a.sli4s(A::v4, A::v3, 16);
1722 a.sli4s(A::v4, A::v3, 31);
1723 },{
1724 0x64,0x54,0x20,0x6f,
1725 0x64,0x54,0x21,0x6f,
1726 0x64,0x54,0x28,0x6f,
1727 0x64,0x54,0x30,0x6f,
1728 0x64,0x54,0x3f,0x6f,
1729 });
1730
1731 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001732 a.scvtf4s (A::v4, A::v3);
1733 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001734 a.fcvtns4s(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001735 },{
1736 0x64,0xd8,0x21,0x4e,
1737 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001738 0x64,0xa8,0x21,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001739 });
Mike Klein15a368d2019-06-26 10:21:12 -04001740
1741 test_asm(r, [&](A& a) {
Mike Kleinb8e041e2020-04-17 11:30:29 -05001742 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1743 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1744 a.strq(A::v1, A::sp); // str q1, [sp]
1745 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
1746 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1747 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
1748 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
1749 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1750 a.add (A::sp, A::sp, 32); // add sp, sp, #32
Mike Klein48e78242020-04-17 09:38:09 -05001751 },{
1752 0xff,0x83,0x00,0xd1,
Mike Kleinb8e041e2020-04-17 11:30:29 -05001753 0xe0,0x07,0x80,0x3d,
1754 0xe1,0x03,0x80,0x3d,
1755 0xe0,0x1b,0x00,0xbd,
1756 0xe0,0xbf,0x00,0x3d,
1757 0xe9,0xab,0x40,0x3d,
1758 0xe7,0x2b,0x40,0xbd,
1759 0xe5,0x03,0xc2,0x3d,
Mike Klein48e78242020-04-17 09:38:09 -05001760 0xff,0x83,0x00,0x91,
1761 });
1762
1763 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001764 a.brk(0);
1765 a.brk(65535);
1766
Mike Klein15a368d2019-06-26 10:21:12 -04001767 a.ret(A::x30); // Conventional ret using link register.
1768 a.ret(A::x13); // Can really return using any register if we like.
1769
1770 a.add(A::x2, A::x2, 4);
1771 a.add(A::x3, A::x2, 32);
1772
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001773 a.sub(A::x2, A::x2, 4);
1774 a.sub(A::x3, A::x2, 32);
1775
Mike Klein15a368d2019-06-26 10:21:12 -04001776 a.subs(A::x2, A::x2, 4);
1777 a.subs(A::x3, A::x2, 32);
1778
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001779 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1780 a.cmp(A::x2, 4);
1781
Mike Kleinc74db792020-05-11 11:57:12 -05001782 A::Label l;
1783 a.label(&l);
Mike Klein65c10b52019-07-12 09:22:21 -05001784 a.bne(&l);
1785 a.bne(&l);
1786 a.blt(&l);
1787 a.b(&l);
1788 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001789 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -04001790 },{
Mike Klein37be7712019-11-13 13:19:01 -06001791 0x00,0x00,0x20,0xd4,
1792 0xe0,0xff,0x3f,0xd4,
1793
Mike Klein15a368d2019-06-26 10:21:12 -04001794 0xc0,0x03,0x5f,0xd6,
1795 0xa0,0x01,0x5f,0xd6,
1796
1797 0x42,0x10,0x00,0x91,
1798 0x43,0x80,0x00,0x91,
1799
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001800 0x42,0x10,0x00,0xd1,
1801 0x43,0x80,0x00,0xd1,
1802
Mike Klein15a368d2019-06-26 10:21:12 -04001803 0x42,0x10,0x00,0xf1,
1804 0x43,0x80,0x00,0xf1,
1805
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001806 0x5f,0x10,0x00,0xf1,
1807 0x5f,0x10,0x00,0xf1,
1808
1809 0x01,0x00,0x00,0x54, // b.ne #0
1810 0xe1,0xff,0xff,0x54, // b.ne #-4
1811 0xcb,0xff,0xff,0x54, // b.lt #-8
1812 0xae,0xff,0xff,0x54, // b.al #-12
1813 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1814 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -04001815 });
Mike Kleine51632e2019-06-26 14:47:43 -04001816
Mike Kleince7b88c2019-07-11 14:06:40 -05001817 // Can we cbz() to a not-yet-defined label?
1818 test_asm(r, [&](A& a) {
1819 A::Label l;
1820 a.cbz(A::x2, &l);
1821 a.add(A::x3, A::x2, 32);
1822 a.label(&l);
1823 a.ret(A::x30);
1824 },{
1825 0x42,0x00,0x00,0xb4, // cbz x2, #8
1826 0x43,0x80,0x00,0x91, // add x3, x2, #32
1827 0xc0,0x03,0x5f,0xd6, // ret
1828 });
1829
1830 // If we start a label as a backward label,
1831 // can we redefine it to be a future label?
1832 // (Not sure this is useful... just want to test it works.)
1833 test_asm(r, [&](A& a) {
Mike Kleinc74db792020-05-11 11:57:12 -05001834 A::Label l1;
1835 a.label(&l1);
Mike Kleince7b88c2019-07-11 14:06:40 -05001836 a.add(A::x3, A::x2, 32);
1837 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1838
Mike Kleinc74db792020-05-11 11:57:12 -05001839 A::Label l2; // Start off the same...
1840 a.label(&l2);
Mike Kleince7b88c2019-07-11 14:06:40 -05001841 a.add(A::x3, A::x2, 32);
1842 a.cbz(A::x2, &l2); // Looks like this will go backward...
1843 a.add(A::x2, A::x2, 4);
1844 a.add(A::x3, A::x2, 32);
1845 a.label(&l2); // But no... actually forward! What a switcheroo!
1846 },{
1847 0x43,0x80,0x00,0x91, // add x3, x2, #32
1848 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1849
1850 0x43,0x80,0x00,0x91, // add x3, x2, #32
1851 0x62,0x00,0x00,0xb4, // cbz x2, #12
1852 0x42,0x10,0x00,0x91, // add x2, x2, #4
1853 0x43,0x80,0x00,0x91, // add x3, x2, #32
1854 });
1855
Mike Klein81d52672019-07-30 11:11:09 -05001856 // Loading from a label on ARM.
1857 test_asm(r, [&](A& a) {
1858 A::Label fore,aft;
1859 a.label(&fore);
1860 a.word(0x01234567);
1861 a.ldrq(A::v1, &fore);
1862 a.ldrq(A::v2, &aft);
1863 a.label(&aft);
1864 a.word(0x76543210);
1865 },{
1866 0x67,0x45,0x23,0x01,
1867 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1868 0x22,0x00,0x00,0x9c, // ldr q2, #4
1869 0x10,0x32,0x54,0x76,
1870 });
1871
Mike Kleine51632e2019-06-26 14:47:43 -04001872 test_asm(r, [&](A& a) {
1873 a.ldrq(A::v0, A::x8);
1874 a.strq(A::v0, A::x8);
1875 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001876 0x00,0x01,0xc0,0x3d,
1877 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001878 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001879
1880 test_asm(r, [&](A& a) {
1881 a.xtns2h(A::v0, A::v0);
1882 a.xtnh2b(A::v0, A::v0);
1883 a.strs (A::v0, A::x0);
1884
1885 a.ldrs (A::v0, A::x0);
1886 a.uxtlb2h(A::v0, A::v0);
1887 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001888
1889 a.uminv4s(A::v3, A::v4);
1890 a.fmovs (A::x3, A::v4); // fmov w3,s4
Mike Klein1fa149a2019-07-01 11:18:08 -05001891 },{
1892 0x00,0x28,0x61,0x0e,
1893 0x00,0x28,0x21,0x0e,
1894 0x00,0x00,0x00,0xbd,
1895
1896 0x00,0x00,0x40,0xbd,
1897 0x00,0xa4,0x08,0x2f,
1898 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001899
1900 0x83,0xa8,0xb1,0x6e,
1901 0x83,0x00,0x26,0x1e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001902 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001903
1904 test_asm(r, [&](A& a) {
1905 a.ldrb(A::v0, A::x8);
1906 a.strb(A::v0, A::x8);
1907 },{
1908 0x00,0x01,0x40,0x3d,
1909 0x00,0x01,0x00,0x3d,
1910 });
Mike Klein81d52672019-07-30 11:11:09 -05001911
1912 test_asm(r, [&](A& a) {
1913 a.tbl(A::v0, A::v1, A::v2);
1914 },{
1915 0x20,0x00,0x02,0x4e,
1916 });
Mike Klein05642042019-06-18 12:16:06 -05001917}
Mike Reedbcb46c02020-03-23 17:51:01 -04001918
1919DEF_TEST(SkVM_approx_math, r) {
1920 auto eval = [](int N, float values[], auto fn) {
1921 skvm::Builder b;
1922 skvm::Arg inout = b.varying<float>();
1923
1924 b.storeF(inout, fn(&b, b.loadF(inout)));
1925
1926 b.done().eval(N, values);
1927 };
1928
1929 auto compare = [r](int N, const float values[], const float expected[]) {
1930 for (int i = 0; i < N; ++i) {
1931 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1932 }
1933 };
1934
1935 // log2
1936 {
1937 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1938 constexpr int N = SK_ARRAY_COUNT(values);
1939 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1940 return b->approx_log2(v);
1941 });
1942 const float expected[] = {-2, -1, 0, 1, 2, 3};
1943 compare(N, values, expected);
1944 }
1945
1946 // pow2
1947 {
1948 float values[] = {-2, -1, 0, 1, 2, 3};
1949 constexpr int N = SK_ARRAY_COUNT(values);
1950 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1951 return b->approx_pow2(v);
1952 });
1953 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
1954 compare(N, values, expected);
1955 }
1956
1957 // powf -- x^0.5
1958 {
1959 float bases[] = {0, 1, 4, 9, 16};
1960 constexpr int N = SK_ARRAY_COUNT(bases);
1961 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
1962 return b->approx_powf(base, b->splat(0.5f));
1963 });
1964 const float expected[] = {0, 1, 2, 3, 4};
1965 compare(N, bases, expected);
1966 }
1967 // powf -- 3^x
1968 {
1969 float exps[] = {-2, -1, 0, 1, 2};
1970 constexpr int N = SK_ARRAY_COUNT(exps);
1971 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
1972 return b->approx_powf(b->splat(3.0f), exp);
1973 });
1974 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
1975 compare(N, exps, expected);
1976 }
Mike Reed82ff25e2020-04-07 13:51:41 -04001977
Mike Reedd468a162020-04-11 14:14:00 -04001978 auto test = [r](float arg, float expected, float tolerance, auto prog) {
Mike Reed82ff25e2020-04-07 13:51:41 -04001979 skvm::Builder b;
1980 skvm::Arg inout = b.varying<float>();
1981 b.storeF(inout, prog(b.loadF(inout)));
Mike Reedd468a162020-04-11 14:14:00 -04001982 float actual = arg;
1983 b.done().eval(1, &actual);
Mike Reed82ff25e2020-04-07 13:51:41 -04001984
Mike Reedd468a162020-04-11 14:14:00 -04001985 float err = std::abs(actual - expected);
Mike Reed801ba0d2020-04-10 12:37:36 -04001986
1987 if (err > tolerance) {
Mike Reedd468a162020-04-11 14:14:00 -04001988 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
Mike Reed1b84ef22020-04-13 17:56:24 -04001989 REPORTER_ASSERT(r, true);
Mike Reed801ba0d2020-04-10 12:37:36 -04001990 }
Mike Reed1b84ef22020-04-13 17:56:24 -04001991 return err;
1992 };
1993
1994 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
1995 skvm::Builder b;
1996 skvm::Arg in0 = b.varying<float>();
1997 skvm::Arg in1 = b.varying<float>();
1998 skvm::Arg out = b.varying<float>();
1999 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2000 float actual;
2001 b.done().eval(1, &arg0, &arg1, &actual);
2002
2003 float err = std::abs(actual - expected);
2004
2005 if (err > tolerance) {
2006 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2007 REPORTER_ASSERT(r, true);
2008 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002009 return err;
Mike Reed82ff25e2020-04-07 13:51:41 -04002010 };
2011
Mike Reed801ba0d2020-04-10 12:37:36 -04002012 // sine, cosine, tangent
Mike Reed82ff25e2020-04-07 13:51:41 -04002013 {
2014 constexpr float P = SK_ScalarPI;
Mike Reed801ba0d2020-04-10 12:37:36 -04002015 constexpr float tol = 0.00175f;
Mike Reed82ff25e2020-04-07 13:51:41 -04002016 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2017 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2018 return approx_sin(x);
2019 });
2020 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2021 return approx_cos(x);
2022 });
2023 }
Mike Reed801ba0d2020-04-10 12:37:36 -04002024
2025 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2026 // so bring in the domain a little.
2027 constexpr float eps = 0.16f;
2028 float err = 0;
2029 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2030 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2031 return approx_tan(x);
2032 });
2033 // try again with some multiples of P, to check our periodicity
2034 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2035 return approx_tan(x + 3*P);
2036 });
2037 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2038 return approx_tan(x - 3*P);
2039 });
2040 }
Mike Reedd468a162020-04-11 14:14:00 -04002041 if (0) { SkDebugf("tan error %g\n", err); }
2042 }
2043
2044 // asin, acos, atan
2045 {
2046 constexpr float tol = 0.00175f;
2047 float err = 0;
2048 for (float x = -1; x <= 1; x += 1.0f/64) {
2049 err += test(x, asin(x), tol, [](skvm::F32 x) {
2050 return approx_asin(x);
2051 });
2052 test(x, acos(x), tol, [](skvm::F32 x) {
2053 return approx_acos(x);
2054 });
Mike Reed801ba0d2020-04-10 12:37:36 -04002055 }
Mike Reedd468a162020-04-11 14:14:00 -04002056 if (0) { SkDebugf("asin error %g\n", err); }
2057
2058 err = 0;
Mike Reed1b84ef22020-04-13 17:56:24 -04002059 for (float x = -10; x <= 10; x += 1.0f/16) {
Mike Reedd468a162020-04-11 14:14:00 -04002060 err += test(x, atan(x), tol, [](skvm::F32 x) {
2061 return approx_atan(x);
2062 });
2063 }
2064 if (0) { SkDebugf("atan error %g\n", err); }
Mike Reed1b84ef22020-04-13 17:56:24 -04002065
2066 for (float y = -3; y <= 3; y += 1) {
2067 for (float x = -3; x <= 3; x += 1) {
2068 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
Mike Klein962020f2020-05-01 10:35:18 -05002069 return approx_atan2(y,x);
Mike Reed1b84ef22020-04-13 17:56:24 -04002070 });
2071 }
2072 }
2073 if (0) { SkDebugf("atan2 error %g\n", err); }
Mike Reed82ff25e2020-04-07 13:51:41 -04002074 }
Mike Reedbcb46c02020-03-23 17:51:01 -04002075}
Mike Klein210288f2020-04-08 11:31:07 -05002076
2077DEF_TEST(SkVM_min_max, r) {
2078 // min() and max() have subtle behavior when one argument is NaN and
2079 // the other isn't. It's not sound to blindly swap their arguments.
2080 //
2081 // All backends must behave like std::min() and std::max(), which are
2082 //
2083 // min(x,y) = y<x ? y : x
2084 // max(x,y) = x<y ? y : x
2085
2086 // ±NaN, ±0, ±1, ±inf
2087 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2088 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2089
2090 float f[8];
2091 memcpy(f, bits, sizeof(bits));
2092
2093 auto identical = [&](float x, float y) {
2094 uint32_t X,Y;
2095 memcpy(&X, &x, 4);
2096 memcpy(&Y, &y, 4);
2097 return X == Y;
2098 };
2099
2100 // Test min/max with non-constant x, non-constant y.
2101 // (Whether x and y are varying or uniform shouldn't make any difference.)
2102 {
2103 skvm::Builder b;
2104 {
2105 skvm::Arg src = b.varying<float>(),
2106 mn = b.varying<float>(),
2107 mx = b.varying<float>();
2108
2109 skvm::F32 x = b.loadF(src),
2110 y = b.uniformF(b.uniform(), 0);
2111
2112 b.storeF(mn, b.min(x,y));
2113 b.storeF(mx, b.max(x,y));
2114 }
2115
Mike Klein10fc1e62020-04-13 11:57:05 -05002116 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002117 float mn[8], mx[8];
2118 for (int i = 0; i < 8; i++) {
2119 // min() and max() everything with f[i].
2120 program.eval(8, f,mn,mx, &f[i]);
2121
2122 for (int j = 0; j < 8; j++) {
2123 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2124 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2125 }
2126 }
2127 });
2128 }
2129
2130 // Test each with constant on the right.
2131 for (int i = 0; i < 8; i++) {
2132 skvm::Builder b;
2133 {
2134 skvm::Arg src = b.varying<float>(),
2135 mn = b.varying<float>(),
2136 mx = b.varying<float>();
2137
2138 skvm::F32 x = b.loadF(src),
2139 y = b.splat(f[i]);
2140
2141 b.storeF(mn, b.min(x,y));
2142 b.storeF(mx, b.max(x,y));
2143 }
2144
Mike Klein10fc1e62020-04-13 11:57:05 -05002145 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002146 float mn[8], mx[8];
2147 program.eval(8, f,mn,mx);
2148 for (int j = 0; j < 8; j++) {
2149 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2150 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2151 }
2152 });
2153 }
2154
2155 // Test each with constant on the left.
2156 for (int i = 0; i < 8; i++) {
2157 skvm::Builder b;
2158 {
2159 skvm::Arg src = b.varying<float>(),
2160 mn = b.varying<float>(),
2161 mx = b.varying<float>();
2162
2163 skvm::F32 x = b.splat(f[i]),
2164 y = b.loadF(src);
2165
2166 b.storeF(mn, b.min(x,y));
2167 b.storeF(mx, b.max(x,y));
2168 }
2169
Mike Klein10fc1e62020-04-13 11:57:05 -05002170 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
Mike Klein210288f2020-04-08 11:31:07 -05002171 float mn[8], mx[8];
2172 program.eval(8, f,mn,mx);
2173 for (int j = 0; j < 8; j++) {
2174 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2175 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2176 }
2177 });
2178 }
2179}
Mike Klein4d680cd2020-07-15 09:58:51 -05002180
2181DEF_TEST(SkVM_halfs, r) {
2182 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2183 0xc400,0xb800,0xbc00,0xc000};
2184 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2185 -4.0f,-0.5f,-1.0f,-2.0f};
2186 {
2187 skvm::Builder b;
2188 skvm::Arg src = b.varying<uint16_t>(),
2189 dst = b.varying<float>();
2190 b.storeF(dst, b.from_half(b.load16(src)));
2191
2192 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2193 float dst[8];
2194 program.eval(8, hs, dst);
2195 for (int i = 0; i < 8; i++) {
2196 REPORTER_ASSERT(r, dst[i] == fs[i]);
2197 }
2198 });
2199 }
2200 {
2201 skvm::Builder b;
2202 skvm::Arg src = b.varying<float>(),
2203 dst = b.varying<uint16_t>();
2204 b.store16(dst, b.to_half(b.loadF(src)));
2205
2206 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
2207 uint16_t dst[8];
2208 program.eval(8, fs, dst);
2209 for (int i = 0; i < 8; i++) {
2210 REPORTER_ASSERT(r, dst[i] == hs[i]);
2211 }
2212 });
2213 }
2214}