blob: 43cc4e50e2c580db67447b89a01f80edfd80e99e [file] [log] [blame]
Mike Klein68c50d02019-05-29 12:57:54 -05001/*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "include/core/SkColorPriv.h"
9#include "include/private/SkColorData.h"
Mike Klein238105b2020-03-04 17:05:32 -060010#include "src/core/SkCpu.h"
Mike Klein3f7c8652019-11-07 10:33:56 -060011#include "src/core/SkMSAN.h"
Mike Klein68c50d02019-05-29 12:57:54 -050012#include "src/core/SkVM.h"
13#include "tests/Test.h"
Mike Klein267f5072019-06-03 16:27:46 -050014#include "tools/Resources.h"
Mike Klein7b7077c2019-06-03 17:10:59 -050015#include "tools/SkVMBuilders.h"
Mike Klein68c50d02019-05-29 12:57:54 -050016
Mike Klein7b7077c2019-06-03 17:10:59 -050017using Fmt = SrcoverBuilder_F32::Fmt;
Mike Klein68c50d02019-05-29 12:57:54 -050018const char* fmt_name(Fmt fmt) {
19 switch (fmt) {
Mike Klein7b7077c2019-06-03 17:10:59 -050020 case Fmt::A8: return "A8";
21 case Fmt::G8: return "G8";
22 case Fmt::RGBA_8888: return "RGBA_8888";
Mike Klein68c50d02019-05-29 12:57:54 -050023 }
24 return "";
25}
26
Mike Klein6b4143e2019-09-18 11:49:29 -050027static void dump(skvm::Builder& builder, SkWStream* o) {
28 skvm::Program program = builder.done();
29 builder.dump(o);
30 o->writeText("\n");
31 program.dump(o);
32 o->writeText("\n");
33}
Mike Klein7e650762019-07-02 15:21:11 -050034
Mike Kleinb5a30762019-10-16 10:11:56 -050035// TODO: I'd like this to go away and have every test in here run both JIT and interpreter.
Mike Klein9977efa2019-07-15 12:22:36 -050036template <typename Fn>
Mike Kleinb5a30762019-10-16 10:11:56 -050037static void test_interpreter_only(skiatest::Reporter* r, skvm::Program&& program, Fn&& test) {
Mike Kleinb5a30762019-10-16 10:11:56 -050038 REPORTER_ASSERT(r, !program.hasJIT());
Mike Klein4e115262019-10-16 16:48:52 +000039 test((const skvm::Program&) program);
Mike Klein52435502019-10-16 10:11:56 -050040}
41
Mike Kleinb5a30762019-10-16 10:11:56 -050042template <typename Fn>
43static void test_jit_and_interpreter(skiatest::Reporter* r, skvm::Program&& program, Fn&& test) {
Mike Klein3f7c8652019-11-07 10:33:56 -060044 static const bool can_jit = []{
45 // This is about the simplest program we can write, setting an int buffer to a constant.
46 // If this can't JIT, the platform does not support JITing.
47 skvm::Builder b;
48 b.store32(b.varying<int>(), b.splat(42));
49 skvm::Program p = b.done();
50 return p.hasJIT();
51 }();
52
53 if (can_jit) {
Mike Kleinb5a30762019-10-16 10:11:56 -050054 REPORTER_ASSERT(r, program.hasJIT());
55 test((const skvm::Program&) program);
56 program.dropJIT();
57 }
Mike Kleinb5a30762019-10-16 10:11:56 -050058 test_interpreter_only(r, std::move(program), std::move(test));
59}
60
61
Mike Klein68c50d02019-05-29 12:57:54 -050062DEF_TEST(SkVM, r) {
Mike Klein267f5072019-06-03 16:27:46 -050063 SkDynamicMemoryWStream buf;
Mike Klein7b7077c2019-06-03 17:10:59 -050064
65 // Write all combinations of SrcoverBuilder_F32
Mike Klein68c50d02019-05-29 12:57:54 -050066 for (int s = 0; s < 3; s++)
67 for (int d = 0; d < 3; d++) {
68 auto srcFmt = (Fmt)s,
69 dstFmt = (Fmt)d;
Mike Klein22ea7e92019-06-10 12:05:48 -050070 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
Mike Klein68c50d02019-05-29 12:57:54 -050071
Mike Klein267f5072019-06-03 16:27:46 -050072 buf.writeText(fmt_name(srcFmt));
73 buf.writeText(" over ");
74 buf.writeText(fmt_name(dstFmt));
75 buf.writeText("\n");
Mike Klein62bccda2019-07-18 10:36:45 -050076 dump(builder, &buf);
Mike Klein267f5072019-06-03 16:27:46 -050077 }
Mike Klein68c50d02019-05-29 12:57:54 -050078
Mike Klein7b7077c2019-06-03 17:10:59 -050079 // Write the I32 Srcovers also.
80 {
Mike Kleinaab45b52019-07-02 15:39:23 -050081 SrcoverBuilder_I32_Naive builder;
Mike Klein397fc882019-06-20 11:37:10 -050082 buf.writeText("I32 (Naive) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050083 dump(builder, &buf);
Mike Klein397fc882019-06-20 11:37:10 -050084 }
85 {
Mike Kleinaab45b52019-07-02 15:39:23 -050086 SrcoverBuilder_I32 builder;
Mike Klein7b7077c2019-06-03 17:10:59 -050087 buf.writeText("I32 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050088 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -050089 }
90 {
Mike Kleinaab45b52019-07-02 15:39:23 -050091 SrcoverBuilder_I32_SWAR builder;
Mike Klein7b7077c2019-06-03 17:10:59 -050092 buf.writeText("I32 (SWAR) 8888 over 8888\n");
Mike Klein62bccda2019-07-18 10:36:45 -050093 dump(builder, &buf);
Mike Klein7b7077c2019-06-03 17:10:59 -050094 }
95
Mike Kleinf9963112019-08-08 15:13:25 -040096 {
Mike Kleind48488b2019-10-22 12:27:58 -050097 // Demonstrate the value of program reordering.
98 skvm::Builder b;
99 skvm::Arg sp = b.varying<int>(),
100 dp = b.varying<int>();
101
102 skvm::I32 byte = b.splat(0xff);
103
104 skvm::I32 src = b.load32(sp),
105 sr = b.extract(src, 0, byte),
106 sg = b.extract(src, 8, byte),
107 sb = b.extract(src, 16, byte),
108 sa = b.extract(src, 24, byte);
109
110 skvm::I32 dst = b.load32(dp),
111 dr = b.extract(dst, 0, byte),
112 dg = b.extract(dst, 8, byte),
113 db = b.extract(dst, 16, byte),
114 da = b.extract(dst, 24, byte);
115
116 skvm::I32 R = b.add(sr, dr),
117 G = b.add(sg, dg),
118 B = b.add(sb, db),
119 A = b.add(sa, da);
120
121 skvm::I32 rg = b.pack(R, G, 8),
122 ba = b.pack(B, A, 8),
123 rgba = b.pack(rg, ba, 16);
124
125 b.store32(dp, rgba);
126
127 dump(b, &buf);
128 }
129
Mike Klein238105b2020-03-04 17:05:32 -0600130 // Our checked in dump expectations assume we have FMA support.
131 const bool fma_supported =
132 #if defined(SK_CPU_X86)
133 SkCpu::Supports(SkCpu::HSW);
134 #elif defined(SK_CPU_ARM64)
135 true;
136 #else
137 false;
138 #endif
139 if (fma_supported) {
140 sk_sp<SkData> blob = buf.detachAsData();
141 {
Mike Klein267f5072019-06-03 16:27:46 -0500142
Mike Klein238105b2020-03-04 17:05:32 -0600143 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
144 REPORTER_ASSERT(r, expected, "Couldn't load SkVMTest.expected.");
145 if (expected) {
146 if (blob->size() != expected->size()
147 || 0 != memcmp(blob->data(), expected->data(), blob->size())) {
Mike Klein267f5072019-06-03 16:27:46 -0500148
Mike Klein238105b2020-03-04 17:05:32 -0600149 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
150 expected->size(), expected->data(),
151 blob->size(), blob->data());
152 }
Mike Klein77163312019-06-04 13:35:32 -0500153
Mike Klein238105b2020-03-04 17:05:32 -0600154 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
155 if (out.isValid()) {
156 out.write(blob->data(), blob->size());
157 }
Mike Klein77163312019-06-04 13:35:32 -0500158 }
Mike Klein68c50d02019-05-29 12:57:54 -0500159 }
160 }
161
Mike Klein9977efa2019-07-15 12:22:36 -0500162 auto test_8888 = [&](skvm::Program&& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500163 uint32_t src[9];
164 uint32_t dst[SK_ARRAY_COUNT(src)];
Mike Klein68c50d02019-05-29 12:57:54 -0500165
Mike Klein92ca3ba2020-01-08 15:49:47 -0600166 test_jit_and_interpreter(r, std::move(program), [&](const skvm::Program& program) {
Mike Klein9977efa2019-07-15 12:22:36 -0500167 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
168 src[i] = 0xbb007733;
169 dst[i] = 0xffaaccee;
Mike Klein3f593792019-06-12 12:54:52 -0500170 }
Mike Klein9977efa2019-07-15 12:22:36 -0500171
172 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
173
174 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
175
176 // dst is probably 0xff2dad72.
177 for (auto got : dst) {
178 auto want = expected;
179 for (int i = 0; i < 4; i++) {
180 uint8_t d = got & 0xff,
181 w = want & 0xff;
Mike Klein37607d42019-07-18 10:17:28 -0500182 if (abs(d-w) >= 2) {
183 SkDebugf("d %02x, w %02x\n", d,w);
184 }
Mike Klein9977efa2019-07-15 12:22:36 -0500185 REPORTER_ASSERT(r, abs(d-w) < 2);
186 got >>= 8;
187 want >>= 8;
188 }
189 }
190 });
Mike Klein3f593792019-06-12 12:54:52 -0500191 };
Mike Klein68c50d02019-05-29 12:57:54 -0500192
Mike Klein37607d42019-07-18 10:17:28 -0500193 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
194 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
195 test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
196 test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
Mike Klein7b7077c2019-06-03 17:10:59 -0500197
Mike Klein92ca3ba2020-01-08 15:49:47 -0600198 test_jit_and_interpreter(r, SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500199 [&](const skvm::Program& program) {
Mike Klein3f593792019-06-12 12:54:52 -0500200 uint32_t src[9];
201 uint8_t dst[SK_ARRAY_COUNT(src)];
202
203 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
204 src[i] = 0xbb007733;
205 dst[i] = 0x42;
206 }
207
208 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
209 0xff424242);
Mike Klein68c50d02019-05-29 12:57:54 -0500210
211 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
212 SkGetPackedG32(over),
213 SkGetPackedB32(over));
Mike Klein3f593792019-06-12 12:54:52 -0500214 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
Mike Klein68c50d02019-05-29 12:57:54 -0500215
Mike Klein3f593792019-06-12 12:54:52 -0500216 for (auto got : dst) {
217 REPORTER_ASSERT(r, abs(got-want) < 3);
218 }
Mike Klein9977efa2019-07-15 12:22:36 -0500219 });
Mike Klein68c50d02019-05-29 12:57:54 -0500220
Mike Kleinb5a30762019-10-16 10:11:56 -0500221 test_jit_and_interpreter(r, SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
Mike Klein9977efa2019-07-15 12:22:36 -0500222 [&](const skvm::Program& program) {
Mike Klein68c50d02019-05-29 12:57:54 -0500223 uint8_t src[256],
224 dst[256];
225 for (int i = 0; i < 256; i++) {
226 src[i] = 255 - i;
227 dst[i] = i;
228 }
229
230 program.eval(256, src, dst);
231
232 for (int i = 0; i < 256; i++) {
233 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
234 SkPackARGB32( i, 0,0,0)));
235 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
236 }
Mike Klein9977efa2019-07-15 12:22:36 -0500237 });
Mike Klein68c50d02019-05-29 12:57:54 -0500238}
Mike Klein81756e42019-06-12 11:36:28 -0500239
Mike Klein7542ab52020-04-02 08:50:16 -0500240DEF_TEST(SkVM_eliminate_dead_code, r) {
241 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400242 {
Mike Klein7542ab52020-04-02 08:50:16 -0500243 skvm::Arg arg = b.varying<int>();
244 skvm::I32 l = b.load32(arg);
245 skvm::I32 a = b.add(l, l);
246 b.add(a, b.splat(7));
247 }
Herb Derbyf20400e2020-03-18 16:11:25 -0400248
Mike Klein7542ab52020-04-02 08:50:16 -0500249 std::vector<skvm::Instruction> program = b.program();
250 REPORTER_ASSERT(r, program.size() == 4);
251
Mike Klein5b701e12020-04-02 10:34:24 -0500252 program = skvm::eliminate_dead_code(program);
Mike Klein7542ab52020-04-02 08:50:16 -0500253 REPORTER_ASSERT(r, program.size() == 0);
254}
255
256DEF_TEST(SkVM_Usage, r) {
257 skvm::Builder b;
Herb Derbyf20400e2020-03-18 16:11:25 -0400258 {
Mike Klein7542ab52020-04-02 08:50:16 -0500259 skvm::Arg arg = b.varying<int>(),
260 buf = b.varying<int>();
261 skvm::I32 l = b.load32(arg);
262 skvm::I32 a = b.add(l, l);
263 skvm::I32 s = b.add(a, b.splat(7));
264 b.store32(buf, s);
Herb Derbyf20400e2020-03-18 16:11:25 -0400265 }
Mike Klein7542ab52020-04-02 08:50:16 -0500266
Mike Kleinb7d87902020-04-02 10:14:35 -0500267 skvm::Usage usage{b.program()};
Mike Klein7542ab52020-04-02 08:50:16 -0500268 REPORTER_ASSERT(r, b.program()[0].op == skvm::Op::load32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500269 REPORTER_ASSERT(r, usage[0].size() == 2);
Mike Klein7542ab52020-04-02 08:50:16 -0500270 REPORTER_ASSERT(r, b.program()[1].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500271 REPORTER_ASSERT(r, usage[1].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500272 REPORTER_ASSERT(r, b.program()[2].op == skvm::Op::splat);
Mike Kleinb7d87902020-04-02 10:14:35 -0500273 REPORTER_ASSERT(r, usage[2].size() == 1);
Mike Klein7542ab52020-04-02 08:50:16 -0500274 REPORTER_ASSERT(r, b.program()[3].op == skvm::Op::add_i32);
Mike Kleinb7d87902020-04-02 10:14:35 -0500275 REPORTER_ASSERT(r, usage[3].size() == 1);
Herb Derbyf20400e2020-03-18 16:11:25 -0400276}
277
Mike Klein9fdadb92019-07-30 12:30:13 -0500278DEF_TEST(SkVM_Pointless, r) {
279 // Let's build a program with no memory arguments.
280 // It should all be pegged as dead code, but we should be able to "run" it.
281 skvm::Builder b;
282 {
283 b.add(b.splat(5.0f),
284 b.splat(4.0f));
285 }
286
Mike Kleinb5a30762019-10-16 10:11:56 -0500287 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein9fdadb92019-07-30 12:30:13 -0500288 for (int N = 0; N < 64; N++) {
289 program.eval(N);
290 }
291 });
292
Mike Kleined9b1f12020-02-06 13:02:32 -0600293 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
Mike Klein0f61c122019-10-16 10:46:01 -0500294 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
Mike Klein9fdadb92019-07-30 12:30:13 -0500295 }
296}
297
Mike Kleinb6149312020-02-26 13:04:23 -0600298#if defined(SKVM_LLVM)
Mike Klein11efa182020-02-27 12:04:37 -0600299DEF_TEST(SkVM_LLVM_memset, r) {
Mike Kleinb6149312020-02-26 13:04:23 -0600300 skvm::Builder b;
301 b.store32(b.varying<int>(), b.splat(42));
302
303 skvm::Program p = b.done();
304 REPORTER_ASSERT(r, p.hasJIT());
305
Mike Klein7b3999e2020-02-27 10:07:53 -0600306 int buf[18];
307 buf[17] = 47;
308
309 p.eval(17, buf);
310 for (int i = 0; i < 17; i++) {
311 REPORTER_ASSERT(r, buf[i] == 42);
Mike Kleinb6149312020-02-26 13:04:23 -0600312 }
Mike Klein7b3999e2020-02-27 10:07:53 -0600313 REPORTER_ASSERT(r, buf[17] == 47);
Mike Kleinb6149312020-02-26 13:04:23 -0600314}
Mike Klein11efa182020-02-27 12:04:37 -0600315
316DEF_TEST(SkVM_LLVM_memcpy, r) {
317 skvm::Builder b;
318 {
319 auto src = b.varying<int>(),
320 dst = b.varying<int>();
321 b.store32(dst, b.load32(src));
322 }
323
324 skvm::Program p = b.done();
325 REPORTER_ASSERT(r, p.hasJIT());
326
327 int src[] = {1,2,3,4,5,6,7,8,9},
328 dst[] = {0,0,0,0,0,0,0,0,0};
329
330 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
331 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
332 REPORTER_ASSERT(r, dst[i] == src[i]);
333 }
334 size_t i = SK_ARRAY_COUNT(src)-1;
335 REPORTER_ASSERT(r, dst[i] == 0);
336}
Mike Kleinb6149312020-02-26 13:04:23 -0600337#endif
338
Mike Klein81756e42019-06-12 11:36:28 -0500339DEF_TEST(SkVM_LoopCounts, r) {
340 // Make sure we cover all the exact N we want.
341
Mike Klein9977efa2019-07-15 12:22:36 -0500342 // buf[i] += 1
343 skvm::Builder b;
Mike Klein5591fdf2019-07-30 09:44:30 -0500344 skvm::Arg arg = b.varying<int>();
Mike Klein9977efa2019-07-15 12:22:36 -0500345 b.store32(arg,
346 b.add(b.splat(1),
347 b.load32(arg)));
348
Mike Kleinb5a30762019-10-16 10:11:56 -0500349 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein9e2218a2019-07-19 11:13:42 -0500350 int buf[64];
351 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
Mike Klein9977efa2019-07-15 12:22:36 -0500352 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
353 buf[i] = i;
354 }
355 program.eval(N, buf);
Mike Klein81756e42019-06-12 11:36:28 -0500356
Mike Klein9977efa2019-07-15 12:22:36 -0500357 for (int i = 0; i < N; i++) {
358 REPORTER_ASSERT(r, buf[i] == i+1);
359 }
360 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
361 REPORTER_ASSERT(r, buf[i] == i);
362 }
Mike Klein9e2218a2019-07-19 11:13:42 -0500363 }
364 });
Mike Klein81756e42019-06-12 11:36:28 -0500365}
Mike Klein05642042019-06-18 12:16:06 -0500366
Mike Kleinb2b6a992020-01-13 16:34:30 -0600367DEF_TEST(SkVM_gather32, r) {
368 skvm::Builder b;
369 {
370 skvm::Arg uniforms = b.uniform(),
371 buf = b.varying<int>();
372 skvm::I32 x = b.load32(buf);
373 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
374 }
375
376#if defined(SK_CPU_X86)
377 test_jit_and_interpreter
378#else
379 test_interpreter_only
380#endif
381 (r, b.done(), [&](const skvm::Program& program) {
382 const int img[] = {12,34,56,78, 90,98,76,54};
383
384 int buf[20];
385 for (int i = 0; i < 20; i++) {
386 buf[i] = i;
387 }
388
389 struct Uniforms {
390 const int* img;
391 } uniforms{img};
392
393 program.eval(20, &uniforms, buf);
394 int i = 0;
395 REPORTER_ASSERT(r, buf[i] == 12); i++;
396 REPORTER_ASSERT(r, buf[i] == 34); i++;
397 REPORTER_ASSERT(r, buf[i] == 56); i++;
398 REPORTER_ASSERT(r, buf[i] == 78); i++;
399 REPORTER_ASSERT(r, buf[i] == 90); i++;
400 REPORTER_ASSERT(r, buf[i] == 98); i++;
401 REPORTER_ASSERT(r, buf[i] == 76); i++;
402 REPORTER_ASSERT(r, buf[i] == 54); i++;
403
404 REPORTER_ASSERT(r, buf[i] == 12); i++;
405 REPORTER_ASSERT(r, buf[i] == 34); i++;
406 REPORTER_ASSERT(r, buf[i] == 56); i++;
407 REPORTER_ASSERT(r, buf[i] == 78); i++;
408 REPORTER_ASSERT(r, buf[i] == 90); i++;
409 REPORTER_ASSERT(r, buf[i] == 98); i++;
410 REPORTER_ASSERT(r, buf[i] == 76); i++;
411 REPORTER_ASSERT(r, buf[i] == 54); i++;
412
413 REPORTER_ASSERT(r, buf[i] == 12); i++;
414 REPORTER_ASSERT(r, buf[i] == 34); i++;
415 REPORTER_ASSERT(r, buf[i] == 56); i++;
416 REPORTER_ASSERT(r, buf[i] == 78); i++;
417 });
418}
419
Mike Klein81d52672019-07-30 11:11:09 -0500420DEF_TEST(SkVM_gathers, r) {
421 skvm::Builder b;
422 {
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600423 skvm::Arg uniforms = b.uniform(),
424 buf32 = b.varying<int>(),
425 buf16 = b.varying<uint16_t>(),
426 buf8 = b.varying<uint8_t>();
Mike Klein81d52672019-07-30 11:11:09 -0500427
428 skvm::I32 x = b.load32(buf32);
429
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600430 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
431 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
432 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
Mike Klein81d52672019-07-30 11:11:09 -0500433 }
434
Mike Klein22c007d2020-02-28 11:38:58 -0600435#if defined(SKVM_LLVM)
436 test_jit_and_interpreter
437#else
438 test_interpreter_only
439#endif
440 (r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500441 const int img[] = {12,34,56,78, 90,98,76,54};
442
443 constexpr int N = 20;
444 int buf32[N];
445 uint16_t buf16[N];
446 uint8_t buf8 [N];
447
448 for (int i = 0; i < 20; i++) {
449 buf32[i] = i;
450 }
451
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600452 struct Uniforms {
453 const int* img;
454 } uniforms{img};
455
456 program.eval(N, &uniforms, buf32, buf16, buf8);
Mike Klein81d52672019-07-30 11:11:09 -0500457 int i = 0;
458 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
459 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
460 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
461 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
462 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
463 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
464 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
465 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
466
467 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
468 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
469 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
470 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
471 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
472 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
473 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
474 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
475
476 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
477 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
478 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
479 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
480 });
481}
482
483DEF_TEST(SkVM_bitops, r) {
484 skvm::Builder b;
485 {
486 skvm::Arg ptr = b.varying<int>();
487
488 skvm::I32 x = b.load32(ptr);
489
Mike Klein4067a942020-04-05 10:25:32 -0500490 x = b.bit_and (x, b.splat(0xf1)); // 0x40
491 x = b.bit_or (x, b.splat(0x80)); // 0xc0
492 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
493 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
Mike Klein81d52672019-07-30 11:11:09 -0500494
495 x = b.shl(x, 28); // 0xe000'0000
496 x = b.sra(x, 28); // 0xffff'fffe
497 x = b.shr(x, 1); // 0x7fff'ffff
498
499 b.store32(ptr, x);
500 }
501
Mike Klein92ca3ba2020-01-08 15:49:47 -0600502 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500503 int x = 0x42;
504 program.eval(1, &x);
505 REPORTER_ASSERT(r, x == 0x7fff'ffff);
506 });
507}
508
Mike Klein4067a942020-04-05 10:25:32 -0500509DEF_TEST(SkVM_select_is_NaN, r) {
510 skvm::Builder b;
511 {
512 skvm::Arg src = b.varying<float>(),
513 dst = b.varying<float>();
514
515 skvm::F32 x = b.loadF(src);
516 x = select(is_NaN(x), b.splat(0.0f)
517 , x);
518 b.storeF(dst, x);
519 }
520
521 std::vector<skvm::OptimizedInstruction> program = b.optimize();
522 REPORTER_ASSERT(r, program.size() == 4);
523 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
524 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
525 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
526 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
527
528 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
529 // ±NaN, ±0, ±1, ±inf
530 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
531 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
532 uint32_t dst[SK_ARRAY_COUNT(src)];
533 program.eval(SK_ARRAY_COUNT(src), src, dst);
534
535 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
536 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
537 }
538 });
539}
540
Mike Klein81d52672019-07-30 11:11:09 -0500541DEF_TEST(SkVM_f32, r) {
542 skvm::Builder b;
543 {
544 skvm::Arg arg = b.varying<float>();
545
Mike Reedf5ff4c22020-03-23 14:57:53 -0400546 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500547 y = b.add(x,x), // y = 2x
548 z = b.sub(y,x), // z = 2x-x = x
549 w = b.div(z,x); // w = x/x = 1
Mike Reedf5ff4c22020-03-23 14:57:53 -0400550 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500551 }
552
Mike Kleinb5a30762019-10-16 10:11:56 -0500553 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500554 float buf[] = { 1,2,3,4,5,6,7,8,9 };
555 program.eval(SK_ARRAY_COUNT(buf), buf);
556 for (float v : buf) {
557 REPORTER_ASSERT(r, v == 1.0f);
558 }
559 });
560}
561
562DEF_TEST(SkVM_cmp_i32, r) {
563 skvm::Builder b;
564 {
565 skvm::I32 x = b.load32(b.varying<int>());
566
567 auto to_bit = [&](int shift, skvm::I32 mask) {
568 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
569 };
570
571 skvm::I32 m = b.splat(0);
572 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
573 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
574 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
575 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
576 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
577 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
578
579 b.store32(b.varying<int>(), m);
580 }
Herb Derby5c5bd1a2020-02-28 11:00:36 -0600581#if defined(SKVM_LLVM)
582 test_jit_and_interpreter
583#else
584 test_interpreter_only
585#endif
586 (r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500587 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
588 int out[SK_ARRAY_COUNT(in)];
589
590 program.eval(SK_ARRAY_COUNT(in), in, out);
591
592 REPORTER_ASSERT(r, out[0] == 0b001111);
593 REPORTER_ASSERT(r, out[1] == 0b001100);
594 REPORTER_ASSERT(r, out[2] == 0b001010);
595 REPORTER_ASSERT(r, out[3] == 0b001010);
596 REPORTER_ASSERT(r, out[4] == 0b000010);
597 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
598 REPORTER_ASSERT(r, out[i] == 0b110010);
599 }
600 });
601}
602
603DEF_TEST(SkVM_cmp_f32, r) {
604 skvm::Builder b;
605 {
Mike Reedf5ff4c22020-03-23 14:57:53 -0400606 skvm::F32 x = b.loadF(b.varying<float>());
Mike Klein81d52672019-07-30 11:11:09 -0500607
608 auto to_bit = [&](int shift, skvm::I32 mask) {
609 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
610 };
611
612 skvm::I32 m = b.splat(0);
613 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
614 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
615 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
616 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
617 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
618 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
619
620 b.store32(b.varying<int>(), m);
621 }
622
Mike Klein92ca3ba2020-01-08 15:49:47 -0600623 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500624 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
625 int out[SK_ARRAY_COUNT(in)];
626
627 program.eval(SK_ARRAY_COUNT(in), in, out);
628
629 REPORTER_ASSERT(r, out[0] == 0b001111);
630 REPORTER_ASSERT(r, out[1] == 0b001100);
631 REPORTER_ASSERT(r, out[2] == 0b001010);
632 REPORTER_ASSERT(r, out[3] == 0b001010);
633 REPORTER_ASSERT(r, out[4] == 0b000010);
634 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
635 REPORTER_ASSERT(r, out[i] == 0b110010);
636 }
637 });
638}
639
Mike Klein14548b92020-02-28 14:02:29 -0600640DEF_TEST(SkVM_index, r) {
641 skvm::Builder b;
642 b.store32(b.varying<int>(), b.index());
643
644#if defined(SKVM_LLVM) || defined(SK_CPU_X86)
645 test_jit_and_interpreter
646#else
647 test_interpreter_only
648#endif
649 (r, b.done(), [&](const skvm::Program& program) {
650 int buf[23];
651 program.eval(SK_ARRAY_COUNT(buf), buf);
652 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
653 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
654 }
655 });
656}
657
Mike Klein81d52672019-07-30 11:11:09 -0500658DEF_TEST(SkVM_i16x2, r) {
659 skvm::Builder b;
660 {
661 skvm::Arg buf = b.varying<int>();
662
663 skvm::I32 x = b.load32(buf),
664 y = b.add_16x2(x,x), // y = 2x
665 z = b.mul_16x2(x,y), // z = 2x^2
666 w = b.sub_16x2(z,x), // w = x(2x-1)
667 v = b.shl_16x2(w,7), // These shifts will be a no-op
668 u = b.sra_16x2(v,7); // for all but x=12 and x=13.
669 b.store32(buf, u);
670 }
671
Mike Kleine96207a2020-02-28 13:20:06 -0600672#if defined(SKVM_LLVM)
673 test_jit_and_interpreter
674#else
675 test_interpreter_only
676#endif
677 (r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500678 uint16_t buf[] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13 };
679
680 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
681 for (int i = 0; i < 12; i++) {
682 REPORTER_ASSERT(r, buf[i] == i*(2*i-1));
683 }
684 REPORTER_ASSERT(r, buf[12] == 0xff14); // 12*23 = 0x114
685 REPORTER_ASSERT(r, buf[13] == 0xff45); // 13*25 = 0x145
686 });
687}
688
689DEF_TEST(SkVM_cmp_i16, r) {
690 skvm::Builder b;
691 {
692 skvm::Arg buf = b.varying<int>();
693 skvm::I32 x = b.load32(buf);
694
695 auto to_bit = [&](int shift, skvm::I32 mask) {
696 return b.shl_16x2(b.bit_and(mask, b.splat(0x0001'0001)), shift);
697 };
698
699 skvm::I32 m = b.splat(0);
700 m = b.bit_or(m, to_bit(0, b. eq_16x2(x, b.splat(0x0000'0000))));
701 m = b.bit_or(m, to_bit(1, b.neq_16x2(x, b.splat(0x0001'0001))));
702 m = b.bit_or(m, to_bit(2, b. lt_16x2(x, b.splat(0x0002'0002))));
703 m = b.bit_or(m, to_bit(3, b.lte_16x2(x, b.splat(0x0003'0003))));
704 m = b.bit_or(m, to_bit(4, b. gt_16x2(x, b.splat(0x0004'0004))));
705 m = b.bit_or(m, to_bit(5, b.gte_16x2(x, b.splat(0x0005'0005))));
706
707 b.store32(buf, m);
708 }
709
Mike Kleine96207a2020-02-28 13:20:06 -0600710#if defined(SKVM_LLVM)
711 test_jit_and_interpreter
712#else
713 test_interpreter_only
714#endif
715 (r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500716 int16_t buf[] = { 0,1, 2,3, 4,5, 6,7, 8,9 };
717
718 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
719
720 REPORTER_ASSERT(r, buf[0] == 0b001111);
721 REPORTER_ASSERT(r, buf[1] == 0b001100);
722 REPORTER_ASSERT(r, buf[2] == 0b001010);
723 REPORTER_ASSERT(r, buf[3] == 0b001010);
724 REPORTER_ASSERT(r, buf[4] == 0b000010);
725 for (int i = 5; i < (int)SK_ARRAY_COUNT(buf); i++) {
726 REPORTER_ASSERT(r, buf[i] == 0b110010);
727 }
728 });
729}
730
731
Mike Klein4a131192019-07-19 13:56:41 -0500732DEF_TEST(SkVM_mad, r) {
733 // This program is designed to exercise the tricky corners of instruction
734 // and register selection for Op::mad_f32.
735
736 skvm::Builder b;
737 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500738 skvm::Arg arg = b.varying<int>();
Mike Klein4a131192019-07-19 13:56:41 -0500739
740 skvm::F32 x = b.to_f32(b.load32(arg)),
741 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
742 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
743 w = b.mad(z,z,y), // w can alias z but not y.
744 v = b.mad(w,y,w); // Got to stop somewhere.
Mike Klein6e4aad92019-11-08 14:13:15 -0600745 b.store32(arg, b.trunc(v));
Mike Klein4a131192019-07-19 13:56:41 -0500746 }
747
Mike Kleinb5a30762019-10-16 10:11:56 -0500748 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein4a131192019-07-19 13:56:41 -0500749 int x = 2;
750 program.eval(1, &x);
751 // x = 2
752 // y = 2*2 + 2 = 6
753 // z = 6*6 + 2 = 38
754 // w = 38*38 + 6 = 1450
755 // v = 1450*6 + 1450 = 10150
756 REPORTER_ASSERT(r, x == 10150);
757 });
758}
759
Mike Klein7c0332c2020-03-05 14:18:04 -0600760DEF_TEST(SkVM_fms, r) {
761 // Create a pattern that can be peepholed into an Op::fms_f32.
762 skvm::Builder b;
763 {
764 skvm::Arg arg = b.varying<int>();
765
766 skvm::F32 x = b.to_f32(b.load32(arg)),
767 v = b.sub(b.mul(x, b.splat(2.0f)),
768 b.splat(1.0f));
769 b.store32(arg, b.trunc(v));
770 }
771
772 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
773 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
774 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
775
776 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
777 REPORTER_ASSERT(r, buf[i] = 2*i-1);
778 }
779 });
780}
781
782DEF_TEST(SkVM_fnma, r) {
783 // Create a pattern that can be peepholed into an Op::fnma_f32.
784 skvm::Builder b;
785 {
786 skvm::Arg arg = b.varying<int>();
787
788 skvm::F32 x = b.to_f32(b.load32(arg)),
789 v = b.sub(b.splat(1.0f),
790 b.mul(x, b.splat(2.0f)));
791 b.store32(arg, b.trunc(v));
792 }
793
794 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
795 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
796 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
797
798 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
799 REPORTER_ASSERT(r, buf[i] = 1-2*i);
800 }
801 });
802}
803
Mike Klein81d52672019-07-30 11:11:09 -0500804DEF_TEST(SkVM_madder, r) {
805 skvm::Builder b;
806 {
807 skvm::Arg arg = b.varying<float>();
808
Mike Reedf5ff4c22020-03-23 14:57:53 -0400809 skvm::F32 x = b.loadF(arg),
Mike Klein81d52672019-07-30 11:11:09 -0500810 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
811 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
812 w = b.mad(y,y,z);
Mike Reedf5ff4c22020-03-23 14:57:53 -0400813 b.storeF(arg, w);
Mike Klein81d52672019-07-30 11:11:09 -0500814 }
815
Mike Kleinb5a30762019-10-16 10:11:56 -0500816 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein81d52672019-07-30 11:11:09 -0500817 float x = 2.0f;
818 // y = 2*2 + 2 = 6
819 // z = 6*2 + 6 = 18
820 // w = 6*6 + 18 = 54
821 program.eval(1, &x);
822 REPORTER_ASSERT(r, x == 54.0f);
823 });
824}
825
Mike Kleinf22faaf2020-01-09 07:27:39 -0600826DEF_TEST(SkVM_floor, r) {
827 skvm::Builder b;
828 {
829 skvm::Arg arg = b.varying<float>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400830 b.storeF(arg, b.floor(b.loadF(arg)));
Mike Kleinf22faaf2020-01-09 07:27:39 -0600831 }
832
833#if defined(SK_CPU_X86)
834 test_jit_and_interpreter
835#else
836 test_interpreter_only
837#endif
838 (r, b.done(), [&](const skvm::Program& program) {
839 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
840 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
841 program.eval(SK_ARRAY_COUNT(buf), buf);
842 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
843 REPORTER_ASSERT(r, buf[i] == want[i]);
844 }
845 });
846}
847
Mike Klein5caf7de2020-03-12 11:05:46 -0500848DEF_TEST(SkVM_round, r) {
849 skvm::Builder b;
850 {
851 skvm::Arg src = b.varying<float>();
852 skvm::Arg dst = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -0400853 b.store32(dst, b.round(b.loadF(src)));
Mike Klein5caf7de2020-03-12 11:05:46 -0500854 }
855
856 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
857 // We haven't explicitly guaranteed that here... it just probably is.
858 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
859 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
860 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
861 int dst[SK_ARRAY_COUNT(buf)];
862
863 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
864 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
865 REPORTER_ASSERT(r, dst[i] == want[i]);
866 }
867 });
868}
869
Herb Derbyc02a41f2020-02-28 14:25:45 -0600870DEF_TEST(SkVM_min, r) {
871 skvm::Builder b;
872 {
873 skvm::Arg src1 = b.varying<float>();
874 skvm::Arg src2 = b.varying<float>();
875 skvm::Arg dst = b.varying<float>();
876
Mike Reedf5ff4c22020-03-23 14:57:53 -0400877 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600878 }
879
880 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
881 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
882 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
883 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
884 float d[SK_ARRAY_COUNT(s1)];
885 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
886 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
887 REPORTER_ASSERT(r, d[i] == want[i]);
888 }
889 });
890}
891
892DEF_TEST(SkVM_max, r) {
893 skvm::Builder b;
894 {
895 skvm::Arg src1 = b.varying<float>();
896 skvm::Arg src2 = b.varying<float>();
897 skvm::Arg dst = b.varying<float>();
898
Mike Reedf5ff4c22020-03-23 14:57:53 -0400899 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
Herb Derbyc02a41f2020-02-28 14:25:45 -0600900 }
901
902 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
903 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
904 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
905 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
906 float d[SK_ARRAY_COUNT(s1)];
907 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
908 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
909 REPORTER_ASSERT(r, d[i] == want[i]);
910 }
911 });
Herb Derbyfb4ff8d2020-02-28 11:59:10 -0600912}
913
Mike Kleinf98d0d32019-07-22 14:30:18 -0500914DEF_TEST(SkVM_hoist, r) {
915 // This program uses enough constants that it will fail to JIT if we hoist them.
916 // The JIT will try again without hoisting, and that'll just need 2 registers.
917 skvm::Builder b;
918 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500919 skvm::Arg arg = b.varying<int>();
Mike Kleinf98d0d32019-07-22 14:30:18 -0500920 skvm::I32 x = b.load32(arg);
921 for (int i = 0; i < 32; i++) {
922 x = b.add(x, b.splat(i));
923 }
924 b.store32(arg, x);
925 }
926
Mike Klein0f61c122019-10-16 10:46:01 -0500927 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Kleinf98d0d32019-07-22 14:30:18 -0500928 int x = 4;
929 program.eval(1, &x);
930 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
931 // x += 496
932 REPORTER_ASSERT(r, x == 500);
933 });
934}
935
Mike Kleinb9944122019-08-02 12:22:39 -0500936DEF_TEST(SkVM_select, r) {
937 skvm::Builder b;
938 {
939 skvm::Arg buf = b.varying<int>();
940
941 skvm::I32 x = b.load32(buf);
942
943 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
944
945 b.store32(buf, x);
946 }
947
Mike Klein97afd2e2019-10-16 14:11:27 -0500948 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Kleinb9944122019-08-02 12:22:39 -0500949 int buf[] = { 0,1,2,3,4,5,6,7,8 };
950 program.eval(SK_ARRAY_COUNT(buf), buf);
951 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
952 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
953 }
954 });
955}
956
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500957DEF_TEST(SkVM_NewOps, r) {
958 // Exercise a somewhat arbitrary set of new ops.
959 skvm::Builder b;
960 {
Mike Klein5591fdf2019-07-30 09:44:30 -0500961 skvm::Arg buf = b.varying<int16_t>(),
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500962 uniforms = b.uniform();
963
964 skvm::I32 x = b.load16(buf);
965
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600966 const size_t kPtr = sizeof(const int*);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500967
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600968 x = b.add(x, b.uniform32(uniforms, kPtr+0));
969 x = b.mul(x, b.uniform8 (uniforms, kPtr+4));
970 x = b.sub(x, b.uniform16(uniforms, kPtr+6));
971
972 skvm::I32 limit = b.uniform32(uniforms, kPtr+8);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500973 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
974 x = b.select(b.gt(x, limit ), limit , x);
975
Mike Klein6dbd7ff2020-01-06 11:50:37 -0600976 x = b.gather8(uniforms,0, x);
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500977
978 b.store16(buf, x);
979 }
980
981 if ((false)) {
982 SkDynamicMemoryWStream buf;
983 dump(b, &buf);
984 sk_sp<SkData> blob = buf.detachAsData();
985 SkDebugf("%.*s\n", blob->size(), blob->data());
986 }
987
Mike Klein22c007d2020-02-28 11:38:58 -0600988#if defined(SKVM_LLVM)
989 test_jit_and_interpreter
990#else
991 test_interpreter_only
992#endif
993 (r, b.done(), [&](const skvm::Program& program) {
Mike Klein8ac9f4e2019-07-25 14:32:19 -0500994 const int N = 31;
995 int16_t buf[N];
996 for (int i = 0; i < N; i++) {
997 buf[i] = i;
998 }
999
1000 const int M = 16;
1001 uint8_t img[M];
1002 for (int i = 0; i < M; i++) {
1003 img[i] = i*i;
1004 }
1005
1006 struct {
Mike Klein6dbd7ff2020-01-06 11:50:37 -06001007 const uint8_t* img;
Mike Klein8ac9f4e2019-07-25 14:32:19 -05001008 int add = 5;
1009 uint8_t mul = 3;
1010 uint16_t sub = 18;
1011 int limit = M-1;
Mike Klein6dbd7ff2020-01-06 11:50:37 -06001012 } uniforms{img};
Mike Klein8ac9f4e2019-07-25 14:32:19 -05001013
Mike Klein6dbd7ff2020-01-06 11:50:37 -06001014 program.eval(N, buf, &uniforms);
Mike Klein8ac9f4e2019-07-25 14:32:19 -05001015
1016 for (int i = 0; i < N; i++) {
1017 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
1018 int x = 3*(i-1);
1019
1020 // Then that's pinned to the limits of img.
1021 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
1022 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
1023 REPORTER_ASSERT(r, buf[i] == img[x]);
1024 }
1025 });
1026}
1027
Mike Klein5a8404c2020-02-28 14:24:56 -06001028DEF_TEST(SkVM_sqrt, r) {
1029 skvm::Builder b;
1030 auto buf = b.varying<int>();
Mike Reedf5ff4c22020-03-23 14:57:53 -04001031 b.storeF(buf, b.sqrt(b.loadF(buf)));
Mike Klein5a8404c2020-02-28 14:24:56 -06001032
1033#if defined(SKVM_LLVM) || defined(SK_CPU_X86)
1034 test_jit_and_interpreter
1035#else
1036 test_interpreter_only
1037#endif
1038 (r, b.done(), [&](const skvm::Program& program) {
1039 constexpr int K = 17;
1040 float buf[K];
1041 for (int i = 0; i < K; i++) {
1042 buf[i] = (float)(i*i);
1043 }
1044
1045 // x^2 -> x
1046 program.eval(K, buf);
1047
1048 for (int i = 0; i < K; i++) {
1049 REPORTER_ASSERT(r, buf[i] == (float)i);
1050 }
1051 });
1052}
1053
Mike Klein3f7c8652019-11-07 10:33:56 -06001054DEF_TEST(SkVM_MSAN, r) {
1055 // This little memset32() program should be able to JIT, but if we run that
1056 // JIT code in an MSAN build, it won't see the writes initialize buf. So
1057 // this tests that we're using the interpreter instead.
1058 skvm::Builder b;
1059 b.store32(b.varying<int>(), b.splat(42));
1060
1061 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
1062 constexpr int K = 17;
1063 int buf[K]; // Intentionally uninitialized.
1064 program.eval(K, buf);
1065 sk_msan_assert_initialized(buf, buf+K);
1066 for (int x : buf) {
1067 REPORTER_ASSERT(r, x == 42);
1068 }
1069 });
1070}
1071
Mike Klein13601172019-11-08 15:01:02 -06001072DEF_TEST(SkVM_assert, r) {
1073 skvm::Builder b;
1074 b.assert_true(b.lt(b.load32(b.varying<int>()),
1075 b.splat(42)));
1076
1077 test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
Mike Klein749eef62019-11-11 09:47:44 -06001078 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
Mike Klein13601172019-11-08 15:01:02 -06001079 program.eval(SK_ARRAY_COUNT(buf), buf);
1080 });
1081}
1082
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001083DEF_TEST(SkVM_premul, reporter) {
1084 // Test that premul is short-circuited when alpha is known opaque.
1085 {
1086 skvm::Builder p;
1087 auto rptr = p.varying<int>(),
1088 aptr = p.varying<int>();
1089
Mike Reedf5ff4c22020-03-23 14:57:53 -04001090 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001091 g = p.splat(0.0f),
1092 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001093 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001094
1095 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001096 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001097
1098 // load red, load alpha, red *= alpha, store red
1099 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
1100 }
1101
1102 {
1103 skvm::Builder p;
1104 auto rptr = p.varying<int>();
1105
Mike Reedf5ff4c22020-03-23 14:57:53 -04001106 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001107 g = p.splat(0.0f),
1108 b = p.splat(0.0f),
1109 a = p.splat(1.0f);
1110
1111 p.premul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001112 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001113
1114 // load red, store red
1115 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1116 }
1117
1118 // Same deal for unpremul.
1119 {
1120 skvm::Builder p;
1121 auto rptr = p.varying<int>(),
1122 aptr = p.varying<int>();
1123
Mike Reedf5ff4c22020-03-23 14:57:53 -04001124 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001125 g = p.splat(0.0f),
1126 b = p.splat(0.0f),
Mike Reedf5ff4c22020-03-23 14:57:53 -04001127 a = p.loadF(aptr);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001128
1129 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001130 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001131
1132 // load red, load alpha, a bunch of unpremul instructions, store red
1133 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1134 }
1135
1136 {
1137 skvm::Builder p;
1138 auto rptr = p.varying<int>();
1139
Mike Reedf5ff4c22020-03-23 14:57:53 -04001140 skvm::F32 r = p.loadF(rptr),
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001141 g = p.splat(0.0f),
1142 b = p.splat(0.0f),
1143 a = p.splat(1.0f);
1144
1145 p.unpremul(&r, &g, &b, a);
Mike Reedf5ff4c22020-03-23 14:57:53 -04001146 p.storeF(rptr, r);
Mike Kleinbc1ce2c2020-02-03 12:17:13 -06001147
1148 // load red, store red
1149 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1150 }
1151}
Mike Klein05642042019-06-18 12:16:06 -05001152
Mike Klein05642042019-06-18 12:16:06 -05001153template <typename Fn>
1154static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
Mike Klein88c0a902019-06-24 15:34:02 -04001155 uint8_t buf[4096];
1156 skvm::Assembler a{buf};
Mike Klein05642042019-06-18 12:16:06 -05001157 fn(a);
1158
1159 REPORTER_ASSERT(r, a.size() == expected.size());
1160
Mike Klein88c0a902019-06-24 15:34:02 -04001161 auto got = (const uint8_t*)buf,
Mike Klein05642042019-06-18 12:16:06 -05001162 want = expected.begin();
1163 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
Mike Klein61703a62019-06-18 15:01:12 -05001164 REPORTER_ASSERT(r, got[i] == want[i],
1165 "byte %d was %02x, want %02x", i, got[i], want[i]);
Mike Klein05642042019-06-18 12:16:06 -05001166 }
1167}
1168
1169DEF_TEST(SkVM_Assembler, r) {
Mike Klein397fc882019-06-20 11:37:10 -05001170 // Easiest way to generate test cases is
1171 //
1172 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1173 //
1174 // The -x86-asm-syntax=intel bit is optional, controlling the
1175 // input syntax only; the output will always be AT&T op x,y,dst style.
1176 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1177 // that a bit easier to use here, despite maybe favoring AT&T overall.
1178
1179 using A = skvm::Assembler;
Mike Klein05642042019-06-18 12:16:06 -05001180 // Our exit strategy from AVX code.
Mike Klein397fc882019-06-20 11:37:10 -05001181 test_asm(r, [&](A& a) {
Mike Kleinee5864a2019-11-11 09:16:44 -06001182 a.int3();
Mike Klein05642042019-06-18 12:16:06 -05001183 a.vzeroupper();
1184 a.ret();
1185 },{
Mike Kleinee5864a2019-11-11 09:16:44 -06001186 0xcc,
Mike Klein05642042019-06-18 12:16:06 -05001187 0xc5, 0xf8, 0x77,
1188 0xc3,
1189 });
1190
Mike Klein237dbb42019-07-19 09:44:47 -05001191 // Align should pad with zero
Mike Klein397fc882019-06-20 11:37:10 -05001192 test_asm(r, [&](A& a) {
Mike Klein05642042019-06-18 12:16:06 -05001193 a.ret();
1194 a.align(4);
1195 },{
1196 0xc3,
Mike Klein237dbb42019-07-19 09:44:47 -05001197 0x00, 0x00, 0x00,
Mike Klein05642042019-06-18 12:16:06 -05001198 });
Mike Klein61703a62019-06-18 15:01:12 -05001199
Mike Klein397fc882019-06-20 11:37:10 -05001200 test_asm(r, [&](A& a) {
1201 a.add(A::rax, 8); // Always good to test rax.
1202 a.sub(A::rax, 32);
Mike Kleind3e75a72019-06-18 15:26:08 -05001203
Mike Klein397fc882019-06-20 11:37:10 -05001204 a.add(A::rdi, 12); // Last 0x48 REX
1205 a.sub(A::rdi, 8);
Mike Kleind3e75a72019-06-18 15:26:08 -05001206
Mike Klein86a645c2019-07-12 12:29:39 -05001207 a.add(A::r8 , 7); // First 0x49 REX
Mike Klein397fc882019-06-20 11:37:10 -05001208 a.sub(A::r8 , 4);
Mike Kleind3e75a72019-06-18 15:26:08 -05001209
Mike Klein397fc882019-06-20 11:37:10 -05001210 a.add(A::rsi, 128); // Requires 4 byte immediate.
1211 a.sub(A::r8 , 1000000);
Mike Klein61703a62019-06-18 15:01:12 -05001212 },{
Mike Kleind3e75a72019-06-18 15:26:08 -05001213 0x48, 0x83, 0b11'000'000, 0x08,
Mike Klein61703a62019-06-18 15:01:12 -05001214 0x48, 0x83, 0b11'101'000, 0x20,
Mike Kleind3e75a72019-06-18 15:26:08 -05001215
1216 0x48, 0x83, 0b11'000'111, 0x0c,
Mike Klein61703a62019-06-18 15:01:12 -05001217 0x48, 0x83, 0b11'101'111, 0x08,
Mike Kleind3e75a72019-06-18 15:26:08 -05001218
Mike Klein86a645c2019-07-12 12:29:39 -05001219 0x49, 0x83, 0b11'000'000, 0x07,
1220 0x49, 0x83, 0b11'101'000, 0x04,
Mike Kleind3e75a72019-06-18 15:26:08 -05001221
1222 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
Mike Klein86a645c2019-07-12 12:29:39 -05001223 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
Mike Klein61703a62019-06-18 15:01:12 -05001224 });
Mike Klein397fc882019-06-20 11:37:10 -05001225
1226
1227 test_asm(r, [&](A& a) {
1228 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1229 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1230 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1231 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1232 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1233 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1234 },{
1235 /* VEX */ /*op*/ /*modRM*/
1236 0xc5, 0xf5, 0xfe, 0xc2,
1237 0xc5, 0x75, 0xfe, 0xc2,
1238 0xc5, 0xbd, 0xfe, 0xc2,
1239 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1240 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1241 0xc5, 0xf5, 0xfa, 0xc2,
1242 });
Mike Kleinff0ae812019-06-20 15:03:44 -05001243
1244 test_asm(r, [&](A& a) {
Mike Klein714f8cc2019-11-06 12:54:46 -06001245 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1246 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1247 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1248 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1249 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1250 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
Mike Kleinb9944122019-08-02 12:22:39 -05001251 },{
1252 0xc5,0xf5,0x76,0xc2,
1253 0xc5,0xf5,0x66,0xc2,
Mike Klein714f8cc2019-11-06 12:54:46 -06001254 0xc5,0xf4,0xc2,0xc2,0x00,
1255 0xc5,0xf4,0xc2,0xc2,0x01,
1256 0xc5,0xf4,0xc2,0xc2,0x02,
1257 0xc5,0xf4,0xc2,0xc2,0x04,
Mike Kleinb9944122019-08-02 12:22:39 -05001258 });
1259
1260 test_asm(r, [&](A& a) {
Mike Kleina53e47f2019-11-08 13:38:47 -06001261 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1262 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1263 },{
1264 0xc5,0xf4,0x5d,0xc2,
1265 0xc5,0xf4,0x5f,0xc2,
1266 });
1267
1268 test_asm(r, [&](A& a) {
Mike Kleinb9944122019-08-02 12:22:39 -05001269 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1270 },{
1271 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1272 });
1273
1274 test_asm(r, [&](A& a) {
Mike Kleinff0ae812019-06-20 15:03:44 -05001275 a.vpsrld(A::ymm15, A::ymm2, 8);
1276 a.vpsrld(A::ymm0 , A::ymm8, 5);
1277 },{
1278 0xc5, 0x85, 0x72,0xd2, 0x08,
1279 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1280 });
1281
1282 test_asm(r, [&](A& a) {
1283 a.vpermq(A::ymm1, A::ymm2, 5);
1284 },{
1285 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1286 });
Mike Kleine5053412019-06-21 12:37:22 -05001287
1288 test_asm(r, [&](A& a) {
Mike Kleinf22faaf2020-01-09 07:27:39 -06001289 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1290 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1291 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1292 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1293 },{
1294 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1295 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1296 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1297 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1298 });
1299
1300 test_asm(r, [&](A& a) {
Mike Kleine5053412019-06-21 12:37:22 -05001301 A::Label l = a.here();
1302 a.byte(1);
1303 a.byte(2);
1304 a.byte(3);
1305 a.byte(4);
Mike Klein04db9c22019-06-21 14:19:21 -05001306
Mike Klein65c10b52019-07-12 09:22:21 -05001307 a.vbroadcastss(A::ymm0 , &l);
1308 a.vbroadcastss(A::ymm1 , &l);
1309 a.vbroadcastss(A::ymm8 , &l);
1310 a.vbroadcastss(A::ymm15, &l);
Mike Klein04db9c22019-06-21 14:19:21 -05001311
Mike Klein65c10b52019-07-12 09:22:21 -05001312 a.vpshufb(A::ymm4, A::ymm3, &l);
Mike Klein7a13b462019-11-05 07:46:02 -06001313 a.vpaddd (A::ymm4, A::ymm3, &l);
1314 a.vpsubd (A::ymm4, A::ymm3, &l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001315
1316 a.vptest(A::ymm4, &l);
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001317
1318 a.vmulps (A::ymm4, A::ymm3, &l);
Mike Kleine5053412019-06-21 12:37:22 -05001319 },{
1320 0x01, 0x02, 0x03, 0x4,
Mike Klein04db9c22019-06-21 14:19:21 -05001321
Mike Kleine5053412019-06-21 12:37:22 -05001322 /* VEX */ /*op*/ /* ModRM */ /* offset */
1323 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1324 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1325 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1326 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
Mike Klein04db9c22019-06-21 14:19:21 -05001327
1328 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
Mike Klein7a13b462019-11-05 07:46:02 -06001329
1330 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1331 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
Mike Kleinee5864a2019-11-11 09:16:44 -06001332
Mike Klein8c1e0ef2019-11-12 09:07:23 -06001333 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1334
1335 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
Mike Kleine5053412019-06-21 12:37:22 -05001336 });
Mike Klein060eaaa2019-06-21 14:42:09 -05001337
1338 test_asm(r, [&](A& a) {
Mike Klein788967e2019-08-02 10:15:51 -05001339 a.vbroadcastss(A::ymm0, A::rdi, 0);
1340 a.vbroadcastss(A::ymm13, A::r14, 7);
1341 a.vbroadcastss(A::ymm8, A::rdx, -12);
1342 a.vbroadcastss(A::ymm8, A::rdx, 400);
Mike Klein94d054b2019-08-02 10:54:23 -05001343
1344 a.vbroadcastss(A::ymm8, A::xmm0);
1345 a.vbroadcastss(A::ymm0, A::xmm13);
Mike Klein788967e2019-08-02 10:15:51 -05001346 },{
1347 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1348 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1349 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1350 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1351 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
Mike Klein94d054b2019-08-02 10:54:23 -05001352
1353 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1354 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
Mike Klein788967e2019-08-02 10:15:51 -05001355 });
1356
1357 test_asm(r, [&](A& a) {
Mike Klein060eaaa2019-06-21 14:42:09 -05001358 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -05001359 a.jne(&l);
1360 a.jne(&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001361 a.je (&l);
1362 a.jmp(&l);
1363 a.jl (&l);
Mike Kleinee5864a2019-11-11 09:16:44 -06001364 a.jc (&l);
Mike Klein35b97c32019-07-12 12:32:45 -05001365
1366 a.cmp(A::rdx, 0);
1367 a.cmp(A::rax, 12);
1368 a.cmp(A::r14, 2000000000);
Mike Klein060eaaa2019-06-21 14:42:09 -05001369 },{
Mike Klein35b97c32019-07-12 12:32:45 -05001370 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1371 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1372 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1373 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1374 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
Mike Kleinee5864a2019-11-11 09:16:44 -06001375 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
Mike Klein35b97c32019-07-12 12:32:45 -05001376
1377 0x48,0x83,0xfa,0x00,
1378 0x48,0x83,0xf8,0x0c,
1379 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
Mike Klein060eaaa2019-06-21 14:42:09 -05001380 });
Mike Klein120d9e82019-06-21 15:52:55 -05001381
1382 test_asm(r, [&](A& a) {
1383 a.vmovups(A::ymm5, A::rsi);
1384 a.vmovups(A::rsi, A::ymm5);
Mike Kleinae51aa32019-06-21 16:06:03 -05001385
Mike Klein95529e82019-08-02 11:43:43 -05001386 a.vmovups(A::rsi, A::xmm5);
1387
Mike Klein52010b72019-08-02 11:18:00 -05001388 a.vpmovzxwd(A::ymm4, A::rsi);
Mike Kleinae51aa32019-06-21 16:06:03 -05001389 a.vpmovzxbd(A::ymm4, A::rsi);
Mike Kleinf3881b22019-06-21 16:20:24 -05001390
1391 a.vmovq(A::rdx, A::xmm15);
Mike Klein120d9e82019-06-21 15:52:55 -05001392 },{
Mike Kleinae51aa32019-06-21 16:06:03 -05001393 /* VEX */ /*Op*/ /* ModRM */
1394 0xc5, 0xfc, 0x10, 0b00'101'110,
1395 0xc5, 0xfc, 0x11, 0b00'101'110,
1396
Mike Klein95529e82019-08-02 11:43:43 -05001397 0xc5, 0xf8, 0x11, 0b00'101'110,
1398
Mike Klein52010b72019-08-02 11:18:00 -05001399 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
Mike Kleinae51aa32019-06-21 16:06:03 -05001400 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
Mike Kleinf3881b22019-06-21 16:20:24 -05001401
1402 0xc5, 0x79, 0xd6, 0b00'111'010,
Mike Klein120d9e82019-06-21 15:52:55 -05001403 });
Mike Klein2b7b2a22019-06-23 20:35:28 -04001404
1405 test_asm(r, [&](A& a) {
Mike Klein94d054b2019-08-02 10:54:23 -05001406 a.movzbl(A::rax, A::rsi, 0); // Low registers for src and dst.
1407 a.movzbl(A::rax, A::r8, 0); // High src register.
1408 a.movzbl(A::r8 , A::rsi, 0); // High dst register.
1409 a.movzbl(A::r8, A::rsi, 12);
1410 a.movzbl(A::r8, A::rsi, 400);
Mike Klein35b97c32019-07-12 12:32:45 -05001411
1412 a.vmovd(A::rax, A::xmm0);
1413 a.vmovd(A::rax, A::xmm8);
1414 a.vmovd(A::r8, A::xmm0);
1415
1416 a.vmovd(A::xmm0, A::rax);
1417 a.vmovd(A::xmm8, A::rax);
1418 a.vmovd(A::xmm0, A::r8);
1419
Mike Klein93d3fab2020-01-14 10:46:44 -06001420 a.vmovd(A::xmm0 , A::FOUR, A::rcx, A::rax);
1421 a.vmovd(A::xmm15, A::TWO, A::r8, A::rax);
1422 a.vmovd(A::xmm0 , A::ONE, A::rcx, A::r8);
1423
Mike Klein35b97c32019-07-12 12:32:45 -05001424 a.vmovd_direct(A::rax, A::xmm0);
1425 a.vmovd_direct(A::rax, A::xmm8);
1426 a.vmovd_direct(A::r8, A::xmm0);
1427
1428 a.vmovd_direct(A::xmm0, A::rax);
1429 a.vmovd_direct(A::xmm8, A::rax);
1430 a.vmovd_direct(A::xmm0, A::r8);
1431
1432 a.movb(A::rdx, A::rax);
1433 a.movb(A::rdx, A::r8);
1434 a.movb(A::r8 , A::rax);
1435 },{
1436 0x0f,0xb6,0x06,
1437 0x41,0x0f,0xb6,0x00,
1438 0x44,0x0f,0xb6,0x06,
Mike Klein94d054b2019-08-02 10:54:23 -05001439 0x44,0x0f,0xb6,0x46, 12,
1440 0x44,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
Mike Klein35b97c32019-07-12 12:32:45 -05001441
1442 0xc5,0xf9,0x7e,0x00,
1443 0xc5,0x79,0x7e,0x00,
1444 0xc4,0xc1,0x79,0x7e,0x00,
1445
1446 0xc5,0xf9,0x6e,0x00,
1447 0xc5,0x79,0x6e,0x00,
1448 0xc4,0xc1,0x79,0x6e,0x00,
1449
Mike Klein93d3fab2020-01-14 10:46:44 -06001450 0xc5,0xf9,0x6e,0x04,0x88,
1451 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1452 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1453
Mike Klein35b97c32019-07-12 12:32:45 -05001454 0xc5,0xf9,0x7e,0xc0,
1455 0xc5,0x79,0x7e,0xc0,
1456 0xc4,0xc1,0x79,0x7e,0xc0,
1457
1458 0xc5,0xf9,0x6e,0xc0,
1459 0xc5,0x79,0x6e,0xc0,
1460 0xc4,0xc1,0x79,0x6e,0xc0,
1461
1462 0x88, 0x02,
1463 0x44, 0x88, 0x02,
1464 0x41, 0x88, 0x00,
1465 });
1466
1467 test_asm(r, [&](A& a) {
Mike Klein52010b72019-08-02 11:18:00 -05001468 a.vpinsrw(A::xmm1, A::xmm8, A::rsi, 4);
1469 a.vpinsrw(A::xmm8, A::xmm1, A::r8, 12);
1470
Mike Klein35b97c32019-07-12 12:32:45 -05001471 a.vpinsrb(A::xmm1, A::xmm8, A::rsi, 4);
1472 a.vpinsrb(A::xmm8, A::xmm1, A::r8, 12);
1473
Mike Klein95529e82019-08-02 11:43:43 -05001474 a.vpextrw(A::rsi, A::xmm8, 7);
1475 a.vpextrw(A::r8, A::xmm1, 15);
1476
Mike Klein35b97c32019-07-12 12:32:45 -05001477 a.vpextrb(A::rsi, A::xmm8, 7);
1478 a.vpextrb(A::r8, A::xmm1, 15);
1479 },{
Mike Klein52010b72019-08-02 11:18:00 -05001480 0xc5,0xb9, 0xc4, 0x0e, 4,
1481 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1482
Mike Klein35b97c32019-07-12 12:32:45 -05001483 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1484 0xc4,0x43,0x71, 0x20, 0x00, 12,
1485
Mike Klein95529e82019-08-02 11:43:43 -05001486 0xc4,0x63,0x79, 0x15, 0x06, 7,
1487 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1488
Mike Klein35b97c32019-07-12 12:32:45 -05001489 0xc4,0x63,0x79, 0x14, 0x06, 7,
1490 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1491 });
1492
1493 test_asm(r, [&](A& a) {
Mike Klein2b7b2a22019-06-23 20:35:28 -04001494 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1495 },{
1496 0xc5, 0x9d, 0xdf, 0xda,
1497 });
Mike Klein9f4df802019-06-24 18:47:16 -04001498
Mike Kleind4546d62019-07-30 12:15:40 -05001499 test_asm(r, [&](A& a) {
1500 a.vmovdqa (A::ymm3, A::ymm2);
1501 a.vcvttps2dq(A::ymm3, A::ymm2);
1502 a.vcvtdq2ps (A::ymm3, A::ymm2);
Mike Klein6e4aad92019-11-08 14:13:15 -06001503 a.vcvtps2dq (A::ymm3, A::ymm2);
Mike Kleinba9da462020-01-28 14:25:09 -06001504 a.vsqrtps (A::ymm3, A::ymm2);
Mike Kleind4546d62019-07-30 12:15:40 -05001505 },{
1506 0xc5,0xfd,0x6f,0xda,
1507 0xc5,0xfe,0x5b,0xda,
1508 0xc5,0xfc,0x5b,0xda,
Mike Klein6e4aad92019-11-08 14:13:15 -06001509 0xc5,0xfd,0x5b,0xda,
Mike Kleinba9da462020-01-28 14:25:09 -06001510 0xc5,0xfc,0x51,0xda,
Mike Kleind4546d62019-07-30 12:15:40 -05001511 });
1512
Mike Kleinbeaa1082020-01-13 14:04:18 -06001513 test_asm(r, [&](A& a) {
1514 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1515 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1516 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1517 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1518 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1519 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1520 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1521 },{
1522 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1523 0xc4,0xe2,0x75,0x92,0x04,0x10,
1524 0xc4,0x62,0x75,0x92,0x14,0x10,
1525 0xc4,0xa2,0x75,0x92,0x04,0x20,
1526 0xc4,0xc2,0x75,0x92,0x04,0x11,
1527 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1528 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1529 });
1530
Mike Kleinc322f632020-01-13 16:18:58 -06001531 test_asm(r, [&](A& a) {
1532 a.movq(A::rax, A::rdi, 0);
1533 a.movq(A::rax, A::rdi, 1);
1534 a.movq(A::rax, A::rdi, 512);
1535 a.movq(A::r15, A::r13, 42);
1536 a.movq(A::rax, A::r13, 42);
1537 a.movq(A::r15, A::rax, 42);
1538 },{
1539 0x48, 0x8b, 0x07,
1540 0x48, 0x8b, 0x47, 0x01,
1541 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1542 0x4d, 0x8b, 0x7d, 0x2a,
1543 0x49, 0x8b, 0x45, 0x2a,
1544 0x4c, 0x8b, 0x78, 0x2a,
1545 });
1546
Mike Klein9f4df802019-06-24 18:47:16 -04001547 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1548
1549 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001550 a.and16b(A::v4, A::v3, A::v1);
1551 a.orr16b(A::v4, A::v3, A::v1);
1552 a.eor16b(A::v4, A::v3, A::v1);
1553 a.bic16b(A::v4, A::v3, A::v1);
Mike Klein97afd2e2019-10-16 14:11:27 -05001554 a.bsl16b(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001555 a.not16b(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001556
1557 a.add4s(A::v4, A::v3, A::v1);
1558 a.sub4s(A::v4, A::v3, A::v1);
1559 a.mul4s(A::v4, A::v3, A::v1);
1560
Mike Klein97afd2e2019-10-16 14:11:27 -05001561 a.cmeq4s(A::v4, A::v3, A::v1);
1562 a.cmgt4s(A::v4, A::v3, A::v1);
1563
Mike Klein65809142019-06-25 09:44:02 -04001564 a.sub8h(A::v4, A::v3, A::v1);
1565 a.mul8h(A::v4, A::v3, A::v1);
1566
Mike Klein9f4df802019-06-24 18:47:16 -04001567 a.fadd4s(A::v4, A::v3, A::v1);
1568 a.fsub4s(A::v4, A::v3, A::v1);
1569 a.fmul4s(A::v4, A::v3, A::v1);
1570 a.fdiv4s(A::v4, A::v3, A::v1);
Mike Kleina53e47f2019-11-08 13:38:47 -06001571 a.fmin4s(A::v4, A::v3, A::v1);
1572 a.fmax4s(A::v4, A::v3, A::v1);
Mike Klein7c0332c2020-03-05 14:18:04 -06001573 a.fneg4s(A::v4, A::v3);
Mike Klein9f4df802019-06-24 18:47:16 -04001574
Mike Klein65809142019-06-25 09:44:02 -04001575 a.fmla4s(A::v4, A::v3, A::v1);
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001576 a.fmls4s(A::v4, A::v3, A::v1);
Mike Klein81a8d282019-11-06 15:11:01 -06001577
1578 a.fcmeq4s(A::v4, A::v3, A::v1);
1579 a.fcmgt4s(A::v4, A::v3, A::v1);
1580 a.fcmge4s(A::v4, A::v3, A::v1);
Mike Klein9f4df802019-06-24 18:47:16 -04001581 },{
Mike Klein65809142019-06-25 09:44:02 -04001582 0x64,0x1c,0x21,0x4e,
1583 0x64,0x1c,0xa1,0x4e,
1584 0x64,0x1c,0x21,0x6e,
1585 0x64,0x1c,0x61,0x4e,
Mike Klein97afd2e2019-10-16 14:11:27 -05001586 0x64,0x1c,0x61,0x6e,
Mike Klein81a8d282019-11-06 15:11:01 -06001587 0x64,0x58,0x20,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001588
1589 0x64,0x84,0xa1,0x4e,
1590 0x64,0x84,0xa1,0x6e,
1591 0x64,0x9c,0xa1,0x4e,
1592
Mike Klein97afd2e2019-10-16 14:11:27 -05001593 0x64,0x8c,0xa1,0x6e,
1594 0x64,0x34,0xa1,0x4e,
1595
Mike Klein65809142019-06-25 09:44:02 -04001596 0x64,0x84,0x61,0x6e,
1597 0x64,0x9c,0x61,0x4e,
1598
Mike Klein9f4df802019-06-24 18:47:16 -04001599 0x64,0xd4,0x21,0x4e,
1600 0x64,0xd4,0xa1,0x4e,
1601 0x64,0xdc,0x21,0x6e,
1602 0x64,0xfc,0x21,0x6e,
Mike Kleina53e47f2019-11-08 13:38:47 -06001603 0x64,0xf4,0xa1,0x4e,
1604 0x64,0xf4,0x21,0x4e,
Mike Klein7c0332c2020-03-05 14:18:04 -06001605 0x64,0xf8,0xa0,0x6e,
Mike Klein9f4df802019-06-24 18:47:16 -04001606
Mike Klein65809142019-06-25 09:44:02 -04001607 0x64,0xcc,0x21,0x4e,
Jarrett Phillipsf9734c32020-02-13 15:18:37 -06001608 0x64,0xcc,0xa1,0x4e,
Mike Klein81a8d282019-11-06 15:11:01 -06001609
1610 0x64,0xe4,0x21,0x4e,
1611 0x64,0xe4,0xa1,0x6e,
1612 0x64,0xe4,0x21,0x6e,
Mike Klein65809142019-06-25 09:44:02 -04001613 });
1614
1615 test_asm(r, [&](A& a) {
1616 a.shl4s(A::v4, A::v3, 0);
1617 a.shl4s(A::v4, A::v3, 1);
1618 a.shl4s(A::v4, A::v3, 8);
1619 a.shl4s(A::v4, A::v3, 16);
1620 a.shl4s(A::v4, A::v3, 31);
1621
1622 a.sshr4s(A::v4, A::v3, 1);
1623 a.sshr4s(A::v4, A::v3, 8);
1624 a.sshr4s(A::v4, A::v3, 31);
1625
1626 a.ushr4s(A::v4, A::v3, 1);
1627 a.ushr4s(A::v4, A::v3, 8);
1628 a.ushr4s(A::v4, A::v3, 31);
1629
1630 a.ushr8h(A::v4, A::v3, 1);
1631 a.ushr8h(A::v4, A::v3, 8);
1632 a.ushr8h(A::v4, A::v3, 15);
1633 },{
1634 0x64,0x54,0x20,0x4f,
1635 0x64,0x54,0x21,0x4f,
1636 0x64,0x54,0x28,0x4f,
1637 0x64,0x54,0x30,0x4f,
1638 0x64,0x54,0x3f,0x4f,
1639
1640 0x64,0x04,0x3f,0x4f,
1641 0x64,0x04,0x38,0x4f,
1642 0x64,0x04,0x21,0x4f,
1643
1644 0x64,0x04,0x3f,0x6f,
1645 0x64,0x04,0x38,0x6f,
1646 0x64,0x04,0x21,0x6f,
1647
1648 0x64,0x04,0x1f,0x6f,
1649 0x64,0x04,0x18,0x6f,
1650 0x64,0x04,0x11,0x6f,
1651 });
1652
1653 test_asm(r, [&](A& a) {
Mike Klein13267492019-07-19 12:21:19 -05001654 a.sli4s(A::v4, A::v3, 0);
1655 a.sli4s(A::v4, A::v3, 1);
1656 a.sli4s(A::v4, A::v3, 8);
1657 a.sli4s(A::v4, A::v3, 16);
1658 a.sli4s(A::v4, A::v3, 31);
1659 },{
1660 0x64,0x54,0x20,0x6f,
1661 0x64,0x54,0x21,0x6f,
1662 0x64,0x54,0x28,0x6f,
1663 0x64,0x54,0x30,0x6f,
1664 0x64,0x54,0x3f,0x6f,
1665 });
1666
1667 test_asm(r, [&](A& a) {
Mike Klein65809142019-06-25 09:44:02 -04001668 a.scvtf4s (A::v4, A::v3);
1669 a.fcvtzs4s(A::v4, A::v3);
Mike Klein6e4aad92019-11-08 14:13:15 -06001670 a.fcvtns4s(A::v4, A::v3);
Mike Klein65809142019-06-25 09:44:02 -04001671 },{
1672 0x64,0xd8,0x21,0x4e,
1673 0x64,0xb8,0xa1,0x4e,
Mike Klein6e4aad92019-11-08 14:13:15 -06001674 0x64,0xa8,0x21,0x4e,
Mike Klein9f4df802019-06-24 18:47:16 -04001675 });
Mike Klein15a368d2019-06-26 10:21:12 -04001676
1677 test_asm(r, [&](A& a) {
Mike Klein37be7712019-11-13 13:19:01 -06001678 a.brk(0);
1679 a.brk(65535);
1680
Mike Klein15a368d2019-06-26 10:21:12 -04001681 a.ret(A::x30); // Conventional ret using link register.
1682 a.ret(A::x13); // Can really return using any register if we like.
1683
1684 a.add(A::x2, A::x2, 4);
1685 a.add(A::x3, A::x2, 32);
1686
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001687 a.sub(A::x2, A::x2, 4);
1688 a.sub(A::x3, A::x2, 32);
1689
Mike Klein15a368d2019-06-26 10:21:12 -04001690 a.subs(A::x2, A::x2, 4);
1691 a.subs(A::x3, A::x2, 32);
1692
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001693 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1694 a.cmp(A::x2, 4);
1695
Mike Klein15a368d2019-06-26 10:21:12 -04001696 A::Label l = a.here();
Mike Klein65c10b52019-07-12 09:22:21 -05001697 a.bne(&l);
1698 a.bne(&l);
1699 a.blt(&l);
1700 a.b(&l);
1701 a.cbnz(A::x2, &l);
Mike Kleince7b88c2019-07-11 14:06:40 -05001702 a.cbz(A::x2, &l);
Mike Klein15a368d2019-06-26 10:21:12 -04001703 },{
Mike Klein37be7712019-11-13 13:19:01 -06001704 0x00,0x00,0x20,0xd4,
1705 0xe0,0xff,0x3f,0xd4,
1706
Mike Klein15a368d2019-06-26 10:21:12 -04001707 0xc0,0x03,0x5f,0xd6,
1708 0xa0,0x01,0x5f,0xd6,
1709
1710 0x42,0x10,0x00,0x91,
1711 0x43,0x80,0x00,0x91,
1712
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001713 0x42,0x10,0x00,0xd1,
1714 0x43,0x80,0x00,0xd1,
1715
Mike Klein15a368d2019-06-26 10:21:12 -04001716 0x42,0x10,0x00,0xf1,
1717 0x43,0x80,0x00,0xf1,
1718
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001719 0x5f,0x10,0x00,0xf1,
1720 0x5f,0x10,0x00,0xf1,
1721
1722 0x01,0x00,0x00,0x54, // b.ne #0
1723 0xe1,0xff,0xff,0x54, // b.ne #-4
1724 0xcb,0xff,0xff,0x54, // b.lt #-8
1725 0xae,0xff,0xff,0x54, // b.al #-12
1726 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1727 0x62,0xff,0xff,0xb4, // cbz x2, #-20
Mike Klein15a368d2019-06-26 10:21:12 -04001728 });
Mike Kleine51632e2019-06-26 14:47:43 -04001729
Mike Kleince7b88c2019-07-11 14:06:40 -05001730 // Can we cbz() to a not-yet-defined label?
1731 test_asm(r, [&](A& a) {
1732 A::Label l;
1733 a.cbz(A::x2, &l);
1734 a.add(A::x3, A::x2, 32);
1735 a.label(&l);
1736 a.ret(A::x30);
1737 },{
1738 0x42,0x00,0x00,0xb4, // cbz x2, #8
1739 0x43,0x80,0x00,0x91, // add x3, x2, #32
1740 0xc0,0x03,0x5f,0xd6, // ret
1741 });
1742
1743 // If we start a label as a backward label,
1744 // can we redefine it to be a future label?
1745 // (Not sure this is useful... just want to test it works.)
1746 test_asm(r, [&](A& a) {
1747 A::Label l1 = a.here();
1748 a.add(A::x3, A::x2, 32);
1749 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1750
1751 A::Label l2 = a.here(); // Start off the same...
1752 a.add(A::x3, A::x2, 32);
1753 a.cbz(A::x2, &l2); // Looks like this will go backward...
1754 a.add(A::x2, A::x2, 4);
1755 a.add(A::x3, A::x2, 32);
1756 a.label(&l2); // But no... actually forward! What a switcheroo!
1757 },{
1758 0x43,0x80,0x00,0x91, // add x3, x2, #32
1759 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1760
1761 0x43,0x80,0x00,0x91, // add x3, x2, #32
1762 0x62,0x00,0x00,0xb4, // cbz x2, #12
1763 0x42,0x10,0x00,0x91, // add x2, x2, #4
1764 0x43,0x80,0x00,0x91, // add x3, x2, #32
1765 });
1766
Mike Klein81d52672019-07-30 11:11:09 -05001767 // Loading from a label on ARM.
1768 test_asm(r, [&](A& a) {
1769 A::Label fore,aft;
1770 a.label(&fore);
1771 a.word(0x01234567);
1772 a.ldrq(A::v1, &fore);
1773 a.ldrq(A::v2, &aft);
1774 a.label(&aft);
1775 a.word(0x76543210);
1776 },{
1777 0x67,0x45,0x23,0x01,
1778 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1779 0x22,0x00,0x00,0x9c, // ldr q2, #4
1780 0x10,0x32,0x54,0x76,
1781 });
1782
Mike Kleine51632e2019-06-26 14:47:43 -04001783 test_asm(r, [&](A& a) {
1784 a.ldrq(A::v0, A::x8);
1785 a.strq(A::v0, A::x8);
1786 },{
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001787 0x00,0x01,0xc0,0x3d,
1788 0x00,0x01,0x80,0x3d,
Mike Kleine51632e2019-06-26 14:47:43 -04001789 });
Mike Klein1fa149a2019-07-01 11:18:08 -05001790
1791 test_asm(r, [&](A& a) {
1792 a.xtns2h(A::v0, A::v0);
1793 a.xtnh2b(A::v0, A::v0);
1794 a.strs (A::v0, A::x0);
1795
1796 a.ldrs (A::v0, A::x0);
1797 a.uxtlb2h(A::v0, A::v0);
1798 a.uxtlh2s(A::v0, A::v0);
Mike Klein37be7712019-11-13 13:19:01 -06001799
1800 a.uminv4s(A::v3, A::v4);
1801 a.fmovs (A::x3, A::v4); // fmov w3,s4
Mike Klein1fa149a2019-07-01 11:18:08 -05001802 },{
1803 0x00,0x28,0x61,0x0e,
1804 0x00,0x28,0x21,0x0e,
1805 0x00,0x00,0x00,0xbd,
1806
1807 0x00,0x00,0x40,0xbd,
1808 0x00,0xa4,0x08,0x2f,
1809 0x00,0xa4,0x10,0x2f,
Mike Klein37be7712019-11-13 13:19:01 -06001810
1811 0x83,0xa8,0xb1,0x6e,
1812 0x83,0x00,0x26,0x1e,
Mike Klein1fa149a2019-07-01 11:18:08 -05001813 });
Mike Klein4cfe3ed2019-07-11 11:25:37 -05001814
1815 test_asm(r, [&](A& a) {
1816 a.ldrb(A::v0, A::x8);
1817 a.strb(A::v0, A::x8);
1818 },{
1819 0x00,0x01,0x40,0x3d,
1820 0x00,0x01,0x00,0x3d,
1821 });
Mike Klein81d52672019-07-30 11:11:09 -05001822
1823 test_asm(r, [&](A& a) {
1824 a.tbl(A::v0, A::v1, A::v2);
1825 },{
1826 0x20,0x00,0x02,0x4e,
1827 });
Mike Klein05642042019-06-18 12:16:06 -05001828}
Mike Reedbcb46c02020-03-23 17:51:01 -04001829
1830DEF_TEST(SkVM_approx_math, r) {
1831 auto eval = [](int N, float values[], auto fn) {
1832 skvm::Builder b;
1833 skvm::Arg inout = b.varying<float>();
1834
1835 b.storeF(inout, fn(&b, b.loadF(inout)));
1836
1837 b.done().eval(N, values);
1838 };
1839
1840 auto compare = [r](int N, const float values[], const float expected[]) {
1841 for (int i = 0; i < N; ++i) {
1842 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1843 }
1844 };
1845
1846 // log2
1847 {
1848 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1849 constexpr int N = SK_ARRAY_COUNT(values);
1850 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1851 return b->approx_log2(v);
1852 });
1853 const float expected[] = {-2, -1, 0, 1, 2, 3};
1854 compare(N, values, expected);
1855 }
1856
1857 // pow2
1858 {
1859 float values[] = {-2, -1, 0, 1, 2, 3};
1860 constexpr int N = SK_ARRAY_COUNT(values);
1861 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1862 return b->approx_pow2(v);
1863 });
1864 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
1865 compare(N, values, expected);
1866 }
1867
1868 // powf -- x^0.5
1869 {
1870 float bases[] = {0, 1, 4, 9, 16};
1871 constexpr int N = SK_ARRAY_COUNT(bases);
1872 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
1873 return b->approx_powf(base, b->splat(0.5f));
1874 });
1875 const float expected[] = {0, 1, 2, 3, 4};
1876 compare(N, bases, expected);
1877 }
1878 // powf -- 3^x
1879 {
1880 float exps[] = {-2, -1, 0, 1, 2};
1881 constexpr int N = SK_ARRAY_COUNT(exps);
1882 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
1883 return b->approx_powf(b->splat(3.0f), exp);
1884 });
1885 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
1886 compare(N, exps, expected);
1887 }
1888}