blob: c2a1ab603560e98ffd2067b7bcb1ef73a95ebc20 [file] [log] [blame]
Brian Osmanb08cc022020-04-02 11:38:40 -04001/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SKSL_STANDALONE
9
10#include "include/core/SkPoint3.h"
11#include "include/private/SkVx.h"
12#include "src/core/SkUtils.h" // sk_unaligned_load
13#include "src/sksl/SkSLByteCode.h"
14#include "src/sksl/SkSLByteCodeGenerator.h"
15#include "src/sksl/SkSLExternalValue.h"
16
17#include <vector>
18
19namespace SkSL {
20
21#if defined(SK_ENABLE_SKSL_INTERPRETER)
22
23constexpr int VecWidth = ByteCode::kVecWidth;
24
25struct Interpreter {
26
27using F32 = skvx::Vec<VecWidth, float>;
28using I32 = skvx::Vec<VecWidth, int32_t>;
29using U32 = skvx::Vec<VecWidth, uint32_t>;
30
31#define READ8() (*(ip++))
32#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
33#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
Brian Osmanab8f3842020-04-07 09:30:44 -040034#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
35 sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
Brian Osmanb08cc022020-04-02 11:38:40 -040036
Brian Osmanab8f3842020-04-07 09:30:44 -040037#define VECTOR_DISASSEMBLE(op, text) \
Brian Osmanb08cc022020-04-02 11:38:40 -040038 case ByteCodeInstruction::op: printf(text); break; \
39 case ByteCodeInstruction::op##2: printf(text "2"); break; \
40 case ByteCodeInstruction::op##3: printf(text "3"); break; \
41 case ByteCodeInstruction::op##4: printf(text "4"); break;
42
43#define VECTOR_MATRIX_DISASSEMBLE(op, text) \
44 VECTOR_DISASSEMBLE(op, text) \
45 case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
46
Brian Osmanb08cc022020-04-02 11:38:40 -040047static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
Brian Osmanab8f3842020-04-07 09:30:44 -040048 auto inst = READ_INST();
Mike Kleina9741ee2020-04-06 08:54:47 -050049 printf("%04x ", (int)inst);
50 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -040051 VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
52 VECTOR_DISASSEMBLE(kAddI, "addi")
53 case ByteCodeInstruction::kAndB: printf("andb"); break;
54 case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
55 case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
56 case ByteCodeInstruction::kCallExternal: {
57 int argumentCount = READ8();
58 int returnCount = READ8();
59 int externalValue = READ8();
60 printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
61 break;
62 }
63 case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
64 VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq")
65 VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq")
66 VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq")
67 VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq")
68 VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt")
69 VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq")
70 VECTOR_DISASSEMBLE(kCompareFLT, "compareflt")
71 VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq")
72 VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt")
73 VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq")
74 VECTOR_DISASSEMBLE(kCompareSLT, "compareslt")
75 VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq")
76 VECTOR_DISASSEMBLE(kCompareUGT, "compareugt")
77 VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
78 VECTOR_DISASSEMBLE(kCompareULT, "compareult")
79 VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
Brian Osmanab8f3842020-04-07 09:30:44 -040080 VECTOR_DISASSEMBLE(kConvertFtoI, "convertftoi")
81 VECTOR_DISASSEMBLE(kConvertStoF, "convertstof")
82 VECTOR_DISASSEMBLE(kConvertUtoF, "convertutof")
Brian Osmanb08cc022020-04-02 11:38:40 -040083 VECTOR_DISASSEMBLE(kCos, "cos")
84 VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
85 VECTOR_DISASSEMBLE(kDivideS, "divideS")
86 VECTOR_DISASSEMBLE(kDivideU, "divideu")
87 VECTOR_MATRIX_DISASSEMBLE(kDup, "dup")
88 case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
89 case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
90 case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -040091 case ByteCodeInstruction::kLoad: printf("load %d", READ8()); break;
92 case ByteCodeInstruction::kLoad2: printf("load2 %d", READ8()); break;
93 case ByteCodeInstruction::kLoad3: printf("load3 %d", READ8()); break;
94 case ByteCodeInstruction::kLoad4: printf("load4 %d", READ8()); break;
95 case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ8()); break;
96 case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ8()); break;
97 case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ8()); break;
98 case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ8()); break;
99 case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ8()); break;
100 case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ8()); break;
101 case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ8()); break;
102 case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400103 case ByteCodeInstruction::kLoadSwizzle: {
104 int target = READ8();
105 int count = READ8();
106 printf("loadswizzle %d %d", target, count);
107 for (int i = 0; i < count; ++i) {
108 printf(", %d", READ8());
109 }
110 break;
111 }
112 case ByteCodeInstruction::kLoadSwizzleGlobal: {
113 int target = READ8();
114 int count = READ8();
115 printf("loadswizzleglobal %d %d", target, count);
116 for (int i = 0; i < count; ++i) {
117 printf(", %d", READ8());
118 }
119 break;
120 }
121 case ByteCodeInstruction::kLoadSwizzleUniform: {
122 int target = READ8();
123 int count = READ8();
124 printf("loadswizzleuniform %d %d", target, count);
125 for (int i = 0; i < count; ++i) {
126 printf(", %d", READ8());
127 }
128 break;
129 }
130 case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break;
131 case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8());
132 break;
133 case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8());
134 break;
135 case ByteCodeInstruction::kMatrixToMatrix: {
136 int srcCols = READ8();
137 int srcRows = READ8();
138 int dstCols = READ8();
139 int dstRows = READ8();
140 printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
141 break;
142 }
143 case ByteCodeInstruction::kMatrixMultiply: {
144 int lCols = READ8();
145 int lRows = READ8();
146 int rCols = READ8();
147 printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
148 break;
149 }
150 VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
151 VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
Brian Osmanab8f3842020-04-07 09:30:44 -0400152 VECTOR_MATRIX_DISASSEMBLE(kNegateF, "negatef")
153 VECTOR_DISASSEMBLE(kNegateI, "negatei")
Brian Osmanb08cc022020-04-02 11:38:40 -0400154 case ByteCodeInstruction::kNotB: printf("notb"); break;
155 case ByteCodeInstruction::kOrB: printf("orb"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -0400156 VECTOR_MATRIX_DISASSEMBLE(kPop, "pop")
Brian Osmanb08cc022020-04-02 11:38:40 -0400157 case ByteCodeInstruction::kPushImmediate: {
158 uint32_t v = READ32();
159 union { uint32_t u; float f; } pun = { v };
160 printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
161 break;
162 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400163 case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ8()); break;
164 case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ8()); break;
165 case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ8()); break;
166 case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400167 VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
168 VECTOR_DISASSEMBLE(kRemainderS, "remainders")
169 VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
170 case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break;
171 case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break;
172 case ByteCodeInstruction::kScalarToMatrix: {
173 int cols = READ8();
174 int rows = READ8();
175 printf("scalartomatrix %dx%d", cols, rows);
176 break;
177 }
178 case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
179 case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
180 case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
181 VECTOR_DISASSEMBLE(kSin, "sin")
Brian Osmanab8f3842020-04-07 09:30:44 -0400182 VECTOR_DISASSEMBLE(kSqrt, "sqrt")
Brian Osmanb08cc022020-04-02 11:38:40 -0400183 case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
184 case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
185 case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
186 case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break;
187 case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break;
188 case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break;
189 case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break;
190 case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break;
191 case ByteCodeInstruction::kStoreSwizzle: {
192 int target = READ8();
193 int count = READ8();
194 printf("storeswizzle %d %d", target, count);
195 for (int i = 0; i < count; ++i) {
196 printf(", %d", READ8());
197 }
198 break;
199 }
200 case ByteCodeInstruction::kStoreSwizzleGlobal: {
201 int target = READ8();
202 int count = READ8();
203 printf("storeswizzleglobal %d %d", target, count);
204 for (int i = 0; i < count; ++i) {
205 printf(", %d", READ8());
206 }
207 break;
208 }
209 case ByteCodeInstruction::kStoreSwizzleIndirect: {
210 int count = READ8();
211 printf("storeswizzleindirect %d", count);
212 for (int i = 0; i < count; ++i) {
213 printf(", %d", READ8());
214 }
215 break;
216 }
217 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
218 int count = READ8();
219 printf("storeswizzleindirectglobal %d", count);
220 for (int i = 0; i < count; ++i) {
221 printf(", %d", READ8());
222 }
223 break;
224 }
225 case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break;
226 case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8());
227 break;
228 VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf")
229 VECTOR_DISASSEMBLE(kSubtractI, "subtracti")
230 case ByteCodeInstruction::kSwizzle: {
231 printf("swizzle %d, ", READ8());
232 int count = READ8();
233 printf("%d", count);
234 for (int i = 0; i < count; ++i) {
235 printf(", %d", READ8());
236 }
237 break;
238 }
239 VECTOR_DISASSEMBLE(kTan, "tan")
Brian Osmanab8f3842020-04-07 09:30:44 -0400240 case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ8()); break;
241 case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ8()); break;
242 case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ8()); break;
243 case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400244 case ByteCodeInstruction::kXorB: printf("xorb"); break;
245 case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
246 case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
247 case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
248 case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
249 case ByteCodeInstruction::kBranchIfAllFalse:
250 printf("branchifallfalse %d", READ16());
251 break;
252 case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
253 case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
254 case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
255 case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
256 case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
257 case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
258 default:
Brian Osmanab8f3842020-04-07 09:30:44 -0400259 ip -= sizeof(ByteCodeInstruction);
Brian Osmanb08cc022020-04-02 11:38:40 -0400260 printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
261 SkASSERT(false);
262 }
263 return ip;
264}
265
Brian Osmanab8f3842020-04-07 09:30:44 -0400266#define VECTOR_BINARY_OP(base, field, op) \
267 case ByteCodeInstruction::base ## 4: \
268 sp[-4] = sp[-4].field op sp[0].field; \
269 POP(); \
270 /* fall through */ \
271 case ByteCodeInstruction::base ## 3: { \
272 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
273 sp[count] = sp[count].field op sp[0].field; \
274 POP(); \
275 } /* fall through */ \
276 case ByteCodeInstruction::base ## 2: { \
277 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
278 sp[count] = sp[count].field op sp[0].field; \
279 POP(); \
280 } /* fall through */ \
281 case ByteCodeInstruction::base: { \
282 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
283 sp[count] = sp[count].field op sp[0].field; \
284 POP(); \
285 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400286 }
287
288// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
Brian Osmanab8f3842020-04-07 09:30:44 -0400289// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
290#define VECTOR_BINARY_MASKED_OP(base, field, op) \
291 case ByteCodeInstruction::base ## 4: \
292 for (int i = 0; i < VecWidth; ++i) { \
293 if (mask()[i]) { \
294 sp[-4].field[i] op ## = sp[0].field[i]; \
295 } \
296 } \
297 POP(); \
298 /* fall through */ \
299 case ByteCodeInstruction::base ## 3: { \
300 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
301 for (int i = 0; i < VecWidth; ++i) { \
302 if (mask()[i]) { \
303 sp[count].field[i] op ## = sp[0].field[i]; \
304 } \
305 } \
306 POP(); \
307 } /* fall through */ \
308 case ByteCodeInstruction::base ## 2: { \
309 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
310 for (int i = 0; i < VecWidth; ++i) { \
311 if (mask()[i]) { \
312 sp[count].field[i] op ## = sp[0].field[i]; \
313 } \
314 } \
315 POP(); \
316 } /* fall through */ \
317 case ByteCodeInstruction::base: { \
318 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
319 for (int i = 0; i < VecWidth; ++i) { \
320 if (mask()[i]) { \
321 sp[count].field[i] op ## = sp[0].field[i]; \
322 } \
323 } \
324 POP(); \
325 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400326 }
327
328
329#define VECTOR_MATRIX_BINARY_OP(base, field, op) \
330 VECTOR_BINARY_OP(base, field, op) \
Brian Osmanab8f3842020-04-07 09:30:44 -0400331 case ByteCodeInstruction::base ## N: { \
Brian Osmanb08cc022020-04-02 11:38:40 -0400332 int count = READ8(); \
333 for (int i = count; i > 0; --i) { \
334 sp[-count] = sp[-count].field op sp[0].field; \
335 POP(); \
336 } \
Brian Osmanab8f3842020-04-07 09:30:44 -0400337 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400338 }
339
Brian Osmanab8f3842020-04-07 09:30:44 -0400340#define VECTOR_BINARY_FN(base, field, fn) \
341 case ByteCodeInstruction::base ## 4: \
342 sp[-4] = fn(sp[-4].field, sp[0].field); \
343 POP(); \
344 /* fall through */ \
345 case ByteCodeInstruction::base ## 3: { \
346 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
347 sp[count] = fn(sp[count].field, sp[0].field); \
348 POP(); \
349 } /* fall through */ \
350 case ByteCodeInstruction::base ## 2: { \
351 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
352 sp[count] = fn(sp[count].field, sp[0].field); \
353 POP(); \
354 } /* fall through */ \
355 case ByteCodeInstruction::base: { \
356 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
357 sp[count] = fn(sp[count].field, sp[0].field); \
358 POP(); \
359 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400360 }
361
Brian Osmanab8f3842020-04-07 09:30:44 -0400362#define VECTOR_UNARY_FN(base, fn, field) \
363 case ByteCodeInstruction::base ## 4: sp[-3] = fn(sp[-3].field); \
364 case ByteCodeInstruction::base ## 3: sp[-2] = fn(sp[-2].field); \
365 case ByteCodeInstruction::base ## 2: sp[-1] = fn(sp[-1].field); \
366 case ByteCodeInstruction::base: sp[ 0] = fn(sp[ 0].field); \
367 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400368
Brian Osmanb08cc022020-04-02 11:38:40 -0400369union VValue {
370 VValue() {}
371 VValue(F32 f) : fFloat(f) {}
372 VValue(I32 s) : fSigned(s) {}
373 VValue(U32 u) : fUnsigned(u) {}
374
375 F32 fFloat;
376 I32 fSigned;
377 U32 fUnsigned;
378};
379
380struct StackFrame {
381 const uint8_t* fCode;
382 const uint8_t* fIP;
383 VValue* fStack;
384 int fParameterCount;
385};
386
387static F32 VecMod(F32 a, F32 b) {
388 return a - skvx::trunc(a / b) * b;
389}
390
391#define spf(index) sp[index].fFloat
392
393static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp,
394 int baseIndex, I32 mask) {
395 int argumentCount = READ8();
396 int returnCount = READ8();
397 int target = READ8();
398 ExternalValue* v = byteCode->fExternalValues[target];
399 sp -= argumentCount - 1;
400
401 float tmpArgs[4];
402 float tmpReturn[4];
403 SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
404 SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
405
406 for (int i = 0; i < VecWidth; ++i) {
407 if (mask[i]) {
408 for (int j = 0; j < argumentCount; ++j) {
409 tmpArgs[j] = sp[j].fFloat[i];
410 }
411 v->call(baseIndex + i, tmpArgs, tmpReturn);
412 for (int j = 0; j < returnCount; ++j) {
413 sp[j].fFloat[i] = tmpReturn[j];
414 }
415 }
416 }
417 sp += returnCount - 1;
418}
419
420static void Inverse2x2(VValue* sp) {
421 F32 a = sp[-3].fFloat,
422 b = sp[-2].fFloat,
423 c = sp[-1].fFloat,
424 d = sp[ 0].fFloat;
425 F32 idet = F32(1) / (a*d - b*c);
426 sp[-3].fFloat = d * idet;
427 sp[-2].fFloat = -b * idet;
428 sp[-1].fFloat = -c * idet;
429 sp[ 0].fFloat = a * idet;
430}
431
432static void Inverse3x3(VValue* sp) {
433 F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat,
434 a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat,
435 a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat;
436 F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
437 a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
438 sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet;
439 sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet;
440 sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet;
441 sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet;
442 sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet;
443 sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet;
444 sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet;
445 sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet;
446 sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet;
447}
448
449static void Inverse4x4(VValue* sp) {
450 F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3),
451 a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2),
452 a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1),
453 a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0);
454
455 F32 b00 = a00 * a11 - a01 * a10,
456 b01 = a00 * a12 - a02 * a10,
457 b02 = a00 * a13 - a03 * a10,
458 b03 = a01 * a12 - a02 * a11,
459 b04 = a01 * a13 - a03 * a11,
460 b05 = a02 * a13 - a03 * a12,
461 b06 = a20 * a31 - a21 * a30,
462 b07 = a20 * a32 - a22 * a30,
463 b08 = a20 * a33 - a23 * a30,
464 b09 = a21 * a32 - a22 * a31,
465 b10 = a21 * a33 - a23 * a31,
466 b11 = a22 * a33 - a23 * a32;
467
468 F32 idet = F32(1) /
469 (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
470
471 b00 *= idet;
472 b01 *= idet;
473 b02 *= idet;
474 b03 *= idet;
475 b04 *= idet;
476 b05 *= idet;
477 b06 *= idet;
478 b07 *= idet;
479 b08 *= idet;
480 b09 *= idet;
481 b10 *= idet;
482 b11 *= idet;
483
484 spf(-15) = a11 * b11 - a12 * b10 + a13 * b09;
485 spf(-14) = a02 * b10 - a01 * b11 - a03 * b09;
486 spf(-13) = a31 * b05 - a32 * b04 + a33 * b03;
487 spf(-12) = a22 * b04 - a21 * b05 - a23 * b03;
488 spf(-11) = a12 * b08 - a10 * b11 - a13 * b07;
489 spf(-10) = a00 * b11 - a02 * b08 + a03 * b07;
490 spf( -9) = a32 * b02 - a30 * b05 - a33 * b01;
491 spf( -8) = a20 * b05 - a22 * b02 + a23 * b01;
492 spf( -7) = a10 * b10 - a11 * b08 + a13 * b06;
493 spf( -6) = a01 * b08 - a00 * b10 - a03 * b06;
494 spf( -5) = a30 * b04 - a31 * b02 + a33 * b00;
495 spf( -4) = a21 * b02 - a20 * b04 - a23 * b00;
496 spf( -3) = a11 * b07 - a10 * b09 - a12 * b06;
497 spf( -2) = a00 * b09 - a01 * b07 + a02 * b06;
498 spf( -1) = a31 * b01 - a30 * b03 - a32 * b00;
499 spf( 0) = a20 * b03 - a21 * b01 + a22 * b00;
500}
501
502static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
503 float* outReturn[], VValue globals[], const float uniforms[],
504 bool stripedOutput, int N, int baseIndex) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400505 // Needs to be the first N non-negative integers, at least as large as VecWidth
506 static const Interpreter::I32 gLanes = {
507 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
508 };
509
510 VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1;
511
512 #define POP() (*(sp--))
513 #define PUSH(v) (sp[1] = v, ++sp)
514
515 const uint8_t* code = f->fCode.data();
516 const uint8_t* ip = code;
517 std::vector<StackFrame> frames;
518
519 I32 condStack[16]; // Independent condition masks
520 I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
521 I32 contStack[16]; // Continue flags for loops
522 I32 loopStack[16]; // Loop execution masks
523 condStack[0] = maskStack[0] = (gLanes < N);
524 contStack[0] = I32( 0);
525 loopStack[0] = I32(~0);
526 I32* condPtr = condStack;
527 I32* maskPtr = maskStack;
528 I32* contPtr = contStack;
529 I32* loopPtr = loopStack;
530
531 if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) ||
532 f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) {
533 return false;
534 }
535
536 auto mask = [&]() { return *maskPtr & *loopPtr; };
537
Brian Osmanb08cc022020-04-02 11:38:40 -0400538 for (;;) {
Brian Osmanab8f3842020-04-07 09:30:44 -0400539#ifdef TRACE
540 printf("at %3d ", (int) (ip - code));
541 disassemble_instruction(ip);
542 printf(" (stack: %d)\n", (int) (sp - stack) + 1);
Brian Osmanb08cc022020-04-02 11:38:40 -0400543#endif
Brian Osmanab8f3842020-04-07 09:30:44 -0400544 ByteCodeInstruction inst = READ_INST();
545 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400546
Brian Osmanab8f3842020-04-07 09:30:44 -0400547 VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
548 VECTOR_BINARY_OP(kAddI, fSigned, +)
Brian Osmanb08cc022020-04-02 11:38:40 -0400549
Brian Osmanab8f3842020-04-07 09:30:44 -0400550 // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
551 case ByteCodeInstruction::kAndB:
552 sp[-1] = sp[-1].fSigned & sp[0].fSigned;
553 POP();
554 continue;
555 case ByteCodeInstruction::kNotB:
556 sp[0] = ~sp[0].fSigned;
557 continue;
558 case ByteCodeInstruction::kOrB:
559 sp[-1] = sp[-1].fSigned | sp[0].fSigned;
560 POP();
561 continue;
562 case ByteCodeInstruction::kXorB:
563 sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
564 POP();
565 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400566
Brian Osmanab8f3842020-04-07 09:30:44 -0400567 case ByteCodeInstruction::kBranch:
568 ip = code + READ16();
569 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400570
Brian Osmanab8f3842020-04-07 09:30:44 -0400571 case ByteCodeInstruction::kCall: {
572 // Precursor code reserved space for the return value, and pushed all parameters to
573 // the stack. Update our bottom of stack to point at the first parameter, and our
574 // sp to point past those parameters (plus space for locals).
575 int target = READ8();
Mike Klein01d42b12020-04-14 15:34:53 -0500576 const ByteCodeFunction* f = byteCode->fFunctions[target].get();
Brian Osmanab8f3842020-04-07 09:30:44 -0400577 if (skvx::any(mask())) {
Mike Klein01d42b12020-04-14 15:34:53 -0500578 frames.push_back({ code, ip, stack, f->fParameterCount });
579 ip = code = f->fCode.data();
580 stack = sp - f->fParameterCount + 1;
581 sp = stack + f->fParameterCount + f->fLocalCount - 1;
582 // As we did in runStriped(), zero locals so they're safe to mask-store into.
583 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
584 stack[i].fFloat = 0.0f;
585 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400586 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400587 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400588 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400589
Brian Osmanab8f3842020-04-07 09:30:44 -0400590 case ByteCodeInstruction::kCallExternal: {
591 CallExternal(byteCode, ip, sp, baseIndex, mask());
592 continue;
593 }
594
595 case ByteCodeInstruction::kClampIndex: {
596 int length = READ8();
597 if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
598 return false;
Brian Osmanb08cc022020-04-02 11:38:40 -0400599 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400600 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400601 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400602
Brian Osmanab8f3842020-04-07 09:30:44 -0400603 VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
604 VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
605 VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
606 VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
607 VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
608 VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
609 VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
610 VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
611 VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
612 VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
613 VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
614 VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
615 VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
616 VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
617 VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
618 VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
619
620 case ByteCodeInstruction::kConvertFtoI4: sp[-3] = skvx::cast<int>(sp[-3].fFloat);
621 case ByteCodeInstruction::kConvertFtoI3: sp[-2] = skvx::cast<int>(sp[-2].fFloat);
622 case ByteCodeInstruction::kConvertFtoI2: sp[-1] = skvx::cast<int>(sp[-1].fFloat);
623 case ByteCodeInstruction::kConvertFtoI: sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
624 continue;
625
626 case ByteCodeInstruction::kConvertStoF4: sp[-3] = skvx::cast<float>(sp[-3].fSigned);
627 case ByteCodeInstruction::kConvertStoF3: sp[-2] = skvx::cast<float>(sp[-2].fSigned);
628 case ByteCodeInstruction::kConvertStoF2: sp[-1] = skvx::cast<float>(sp[-1].fSigned);
629 case ByteCodeInstruction::kConvertStoF: sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
630 continue;
631
632 case ByteCodeInstruction::kConvertUtoF4: sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
633 case ByteCodeInstruction::kConvertUtoF3: sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
634 case ByteCodeInstruction::kConvertUtoF2: sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
635 case ByteCodeInstruction::kConvertUtoF: sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
636 continue;
637
Mike Kleinc2160252020-04-29 09:56:56 -0500638 VECTOR_UNARY_FN(kCos, skvx::cos, fFloat)
Brian Osmanab8f3842020-04-07 09:30:44 -0400639
640 VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
641 VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
642 VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
643
644 case ByteCodeInstruction::kDup4: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
645 case ByteCodeInstruction::kDup3: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
646 case ByteCodeInstruction::kDup2: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
647 case ByteCodeInstruction::kDup : PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
648 continue;
649
650 case ByteCodeInstruction::kDupN: {
651 int count = READ8();
652 memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
653 sp += count;
654 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400655 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400656
Brian Osmanab8f3842020-04-07 09:30:44 -0400657 case ByteCodeInstruction::kInverse2x2:
658 Inverse2x2(sp);
659 continue;
660 case ByteCodeInstruction::kInverse3x3:
661 Inverse3x3(sp);
662 continue;
663 case ByteCodeInstruction::kInverse4x4:
664 Inverse4x4(sp);
665 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400666
Brian Osmanab8f3842020-04-07 09:30:44 -0400667 case ByteCodeInstruction::kLoad4: sp[4] = stack[*ip + 3];
668 case ByteCodeInstruction::kLoad3: sp[3] = stack[*ip + 2];
669 case ByteCodeInstruction::kLoad2: sp[2] = stack[*ip + 1];
670 case ByteCodeInstruction::kLoad: sp[1] = stack[*ip + 0];
671 ++ip;
672 sp += (int)ByteCodeInstruction::kLoad - (int)inst + 1;
673 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400674
Brian Osmanab8f3842020-04-07 09:30:44 -0400675 case ByteCodeInstruction::kLoadGlobal4: sp[4] = globals[*ip + 3];
676 case ByteCodeInstruction::kLoadGlobal3: sp[3] = globals[*ip + 2];
677 case ByteCodeInstruction::kLoadGlobal2: sp[2] = globals[*ip + 1];
678 case ByteCodeInstruction::kLoadGlobal: sp[1] = globals[*ip + 0];
679 ++ip;
680 sp += (int)ByteCodeInstruction::kLoadGlobal - (int)inst + 1;
681 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400682
Brian Osmanab8f3842020-04-07 09:30:44 -0400683 case ByteCodeInstruction::kLoadUniform4: sp[4].fFloat = uniforms[*ip + 3];
684 case ByteCodeInstruction::kLoadUniform3: sp[3].fFloat = uniforms[*ip + 2];
685 case ByteCodeInstruction::kLoadUniform2: sp[2].fFloat = uniforms[*ip + 1];
686 case ByteCodeInstruction::kLoadUniform: sp[1].fFloat = uniforms[*ip + 0];
687 ++ip;
688 sp += (int)ByteCodeInstruction::kLoadUniform - (int)inst + 1;
689 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400690
Brian Osmanab8f3842020-04-07 09:30:44 -0400691 case ByteCodeInstruction::kLoadExtended: {
692 int count = READ8();
693 I32 src = POP().fSigned;
694 I32 m = mask();
695 for (int i = 0; i < count; ++i) {
696 for (int j = 0; j < VecWidth; ++j) {
697 if (m[j]) {
698 sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
Brian Osmanb08cc022020-04-02 11:38:40 -0400699 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400700 }
701 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400702 sp += count;
703 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400704 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400705
Brian Osmanab8f3842020-04-07 09:30:44 -0400706 case ByteCodeInstruction::kLoadExtendedGlobal: {
707 int count = READ8();
708 I32 src = POP().fSigned;
709 I32 m = mask();
710 for (int i = 0; i < count; ++i) {
711 for (int j = 0; j < VecWidth; ++j) {
712 if (m[j]) {
713 sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
714 }
715 }
716 }
717 sp += count;
718 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400719 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400720
Brian Osmanab8f3842020-04-07 09:30:44 -0400721 case ByteCodeInstruction::kLoadExtendedUniform: {
722 int count = READ8();
723 I32 src = POP().fSigned;
724 I32 m = mask();
725 for (int i = 0; i < count; ++i) {
726 for (int j = 0; j < VecWidth; ++j) {
727 if (m[j]) {
728 sp[i + 1].fFloat[j] = uniforms[src[j] + i];
729 }
730 }
731 }
732 sp += count;
733 continue;
734 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400735
Brian Osmanab8f3842020-04-07 09:30:44 -0400736 case ByteCodeInstruction::kLoadSwizzle: {
737 int src = READ8();
738 int count = READ8();
739 for (int i = 0; i < count; ++i) {
740 PUSH(stack[src + *(ip + i)]);
741 }
742 ip += count;
743 continue;
744 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400745
Brian Osmanab8f3842020-04-07 09:30:44 -0400746 case ByteCodeInstruction::kLoadSwizzleGlobal: {
747 int src = READ8();
748 int count = READ8();
749 for (int i = 0; i < count; ++i) {
750 PUSH(globals[src + *(ip + i)]);
751 }
752 ip += count;
753 continue;
754 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400755
Brian Osmanab8f3842020-04-07 09:30:44 -0400756 case ByteCodeInstruction::kLoadSwizzleUniform: {
757 int src = READ8();
758 int count = READ8();
759 for (int i = 0; i < count; ++i) {
760 PUSH(F32(uniforms[src + *(ip + i)]));
761 }
762 ip += count;
763 continue;
764 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400765
Brian Osmanab8f3842020-04-07 09:30:44 -0400766 case ByteCodeInstruction::kMatrixToMatrix: {
767 int srcCols = READ8();
768 int srcRows = READ8();
769 int dstCols = READ8();
770 int dstRows = READ8();
771 SkASSERT(srcCols >= 2 && srcCols <= 4);
772 SkASSERT(srcRows >= 2 && srcRows <= 4);
773 SkASSERT(dstCols >= 2 && dstCols <= 4);
774 SkASSERT(dstRows >= 2 && dstRows <= 4);
775 F32 tmp[16];
776 memset(tmp, 0, sizeof(tmp));
777 tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
778 for (int c = srcCols - 1; c >= 0; --c) {
779 for (int r = srcRows - 1; r >= 0; --r) {
780 tmp[c*4 + r] = POP().fFloat;
781 }
782 }
783 for (int c = 0; c < dstCols; ++c) {
784 for (int r = 0; r < dstRows; ++r) {
785 PUSH(tmp[c*4 + r]);
786 }
787 }
788 continue;
789 }
790
791 case ByteCodeInstruction::kMatrixMultiply: {
792 int lCols = READ8();
793 int lRows = READ8();
794 int rCols = READ8();
795 int rRows = lCols;
796 F32 tmp[16] = { 0.0f };
797 F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
798 F32* A = B - (lCols * lRows);
799 for (int c = 0; c < rCols; ++c) {
800 for (int r = 0; r < lRows; ++r) {
801 for (int j = 0; j < lCols; ++j) {
802 tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
803 }
804 }
805 }
806 sp -= (lCols * lRows) + (rCols * rRows);
807 memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
808 sp += (rCols * lRows);
809 continue;
810 }
811
812 VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
813 VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
814
815 case ByteCodeInstruction::kNegateF4: sp[-3] = -sp[-3].fFloat;
816 case ByteCodeInstruction::kNegateF3: sp[-2] = -sp[-2].fFloat;
817 case ByteCodeInstruction::kNegateF2: sp[-1] = -sp[-1].fFloat;
818 case ByteCodeInstruction::kNegateF: sp[ 0] = -sp[ 0].fFloat;
819 continue;
820
821 case ByteCodeInstruction::kNegateFN: {
822 int count = READ8();
823 for (int i = count - 1; i >= 0; --i) {
824 sp[-i] = -sp[-i].fFloat;
825 }
826 continue;
827 }
828
829 case ByteCodeInstruction::kNegateI4: sp[-3] = -sp[-3].fSigned;
830 case ByteCodeInstruction::kNegateI3: sp[-2] = -sp[-2].fSigned;
831 case ByteCodeInstruction::kNegateI2: sp[-1] = -sp[-1].fSigned;
832 case ByteCodeInstruction::kNegateI: sp[ 0] = -sp[ 0].fSigned;
833 continue;
834
835 case ByteCodeInstruction::kPop4: POP();
836 case ByteCodeInstruction::kPop3: POP();
837 case ByteCodeInstruction::kPop2: POP();
838 case ByteCodeInstruction::kPop: POP();
839 continue;
840
841 case ByteCodeInstruction::kPopN:
842 sp -= READ8();
843 continue;
844
845 case ByteCodeInstruction::kPushImmediate:
846 PUSH(U32(READ32()));
847 continue;
848
849 case ByteCodeInstruction::kReadExternal:
850 case ByteCodeInstruction::kReadExternal2:
851 case ByteCodeInstruction::kReadExternal3:
852 case ByteCodeInstruction::kReadExternal4: {
853 int count = (int)ByteCodeInstruction::kReadExternal - (int)inst + 1;
854 int src = READ8();
855 float tmp[4];
856 I32 m = mask();
857 for (int i = 0; i < VecWidth; ++i) {
858 if (m[i]) {
859 byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
860 for (int j = 0; j < count; ++j) {
861 sp[j + 1].fFloat[i] = tmp[j];
862 }
863 }
864 }
865 sp += count;
866 continue;
867 }
868
869 VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
870 VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
871 VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
872
873 case ByteCodeInstruction::kReserve:
874 sp += READ8();
875 continue;
876
877 case ByteCodeInstruction::kReturn: {
878 int count = READ8();
879 if (frames.empty()) {
880 if (outReturn) {
881 VValue* src = sp - count + 1;
882 if (stripedOutput) {
883 for (int i = 0; i < count; ++i) {
884 memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
885 ++src;
886 }
887 } else {
888 float* outPtr = outReturn[0];
889 for (int i = 0; i < count; ++i) {
890 for (int j = 0; j < N; ++j) {
891 outPtr[count * j] = src->fFloat[j];
892 }
893 ++outPtr;
894 ++src;
895 }
896 }
897 }
898 return true;
899 } else {
900 // When we were called, the caller reserved stack space for their copy of our
901 // return value, then 'stack' was positioned after that, where our parameters
902 // were placed. Copy our return values to their reserved area.
903 memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
904
905 // Now move the stack pointer to the end of the passed-in parameters. This odd
906 // calling convention requires the caller to pop the arguments after calling,
907 // but allows them to store any out-parameters back during that unwinding.
908 // After that sequence finishes, the return value will be the top of the stack.
909 const StackFrame& frame(frames.back());
910 sp = stack + frame.fParameterCount - 1;
911 stack = frame.fStack;
912 code = frame.fCode;
913 ip = frame.fIP;
914 frames.pop_back();
915 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400916 }
917 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400918
919 case ByteCodeInstruction::kScalarToMatrix: {
920 int cols = READ8();
921 int rows = READ8();
922 VValue v = POP();
923 for (int c = 0; c < cols; ++c) {
924 for (int r = 0; r < rows; ++r) {
925 PUSH(c == r ? v : F32(0.0f));
926 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400927 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400928 continue;
929 }
930
931 case ByteCodeInstruction::kShiftLeft:
932 sp[0] = sp[0].fSigned << READ8();
933 continue;
934 case ByteCodeInstruction::kShiftRightS:
935 sp[0] = sp[0].fSigned >> READ8();
936 continue;
937 case ByteCodeInstruction::kShiftRightU:
938 sp[0] = sp[0].fUnsigned >> READ8();
939 continue;
940
Mike Kleinc2160252020-04-29 09:56:56 -0500941 VECTOR_UNARY_FN(kSin, skvx::sin, fFloat)
Brian Osmanab8f3842020-04-07 09:30:44 -0400942 VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
943
944 case ByteCodeInstruction::kStore4:
945 stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
946 case ByteCodeInstruction::kStore3:
947 stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
948 case ByteCodeInstruction::kStore2:
949 stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
950 case ByteCodeInstruction::kStore:
951 stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
952 ++ip;
953 continue;
954
955 case ByteCodeInstruction::kStoreGlobal4:
956 globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
957 case ByteCodeInstruction::kStoreGlobal3:
958 globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
959 case ByteCodeInstruction::kStoreGlobal2:
960 globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
961 case ByteCodeInstruction::kStoreGlobal:
962 globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
963 ++ip;
964 continue;
965
966 case ByteCodeInstruction::kStoreExtended: {
967 int count = READ8();
968 I32 target = POP().fSigned;
969 VValue* src = sp - count + 1;
970 I32 m = mask();
971 for (int i = 0; i < count; ++i) {
972 for (int j = 0; j < VecWidth; ++j) {
973 if (m[j]) {
974 stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
975 }
976 }
977 }
978 sp -= count;
979 continue;
980 }
981 case ByteCodeInstruction::kStoreExtendedGlobal: {
982 int count = READ8();
983 I32 target = POP().fSigned;
984 VValue* src = sp - count + 1;
985 I32 m = mask();
986 for (int i = 0; i < count; ++i) {
987 for (int j = 0; j < VecWidth; ++j) {
988 if (m[j]) {
989 globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
990 }
991 }
992 }
993 sp -= count;
994 continue;
995 }
996
997 case ByteCodeInstruction::kStoreSwizzle: {
998 int target = READ8();
999 int count = READ8();
1000 for (int i = count - 1; i >= 0; --i) {
1001 stack[target + *(ip + i)] = skvx::if_then_else(
1002 mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
1003 }
1004 ip += count;
1005 continue;
1006 }
1007
1008 case ByteCodeInstruction::kStoreSwizzleGlobal: {
1009 int target = READ8();
1010 int count = READ8();
1011 for (int i = count - 1; i >= 0; --i) {
1012 globals[target + *(ip + i)] = skvx::if_then_else(
1013 mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
1014 }
1015 ip += count;
1016 continue;
1017 }
1018
1019 case ByteCodeInstruction::kStoreSwizzleIndirect: {
1020 int count = READ8();
1021 I32 target = POP().fSigned;
1022 I32 m = mask();
1023 for (int i = count - 1; i >= 0; --i) {
1024 I32 v = POP().fSigned;
1025 for (int j = 0; j < VecWidth; ++j) {
1026 if (m[j]) {
1027 stack[target[j] + *(ip + i)].fSigned[j] = v[j];
1028 }
1029 }
1030 }
1031 ip += count;
1032 continue;
1033 }
1034
1035 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
1036 int count = READ8();
1037 I32 target = POP().fSigned;
1038 I32 m = mask();
1039 for (int i = count - 1; i >= 0; --i) {
1040 I32 v = POP().fSigned;
1041 for (int j = 0; j < VecWidth; ++j) {
1042 if (m[j]) {
1043 globals[target[j] + *(ip + i)].fSigned[j] = v[j];
1044 }
1045 }
1046 }
1047 ip += count;
1048 continue;
1049 }
1050
1051 VECTOR_BINARY_OP(kSubtractI, fSigned, -)
1052 VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
1053
1054 case ByteCodeInstruction::kSwizzle: {
1055 VValue tmp[4];
1056 for (int i = READ8() - 1; i >= 0; --i) {
1057 tmp[i] = POP();
1058 }
1059 for (int i = READ8() - 1; i >= 0; --i) {
1060 PUSH(tmp[READ8()]);
1061 }
1062 continue;
1063 }
1064
Mike Kleinc2160252020-04-29 09:56:56 -05001065 VECTOR_UNARY_FN(kTan, skvx::tan, fFloat)
Brian Osmanab8f3842020-04-07 09:30:44 -04001066
1067 case ByteCodeInstruction::kWriteExternal4:
1068 case ByteCodeInstruction::kWriteExternal3:
1069 case ByteCodeInstruction::kWriteExternal2:
1070 case ByteCodeInstruction::kWriteExternal: {
1071 int count = (int)ByteCodeInstruction::kWriteExternal - (int)inst + 1;
1072 int target = READ8();
1073 float tmp[4];
1074 I32 m = mask();
1075 sp -= count;
1076 for (int i = 0; i < VecWidth; ++i) {
1077 if (m[i]) {
1078 for (int j = 0; j < count; ++j) {
1079 tmp[j] = sp[j + 1].fFloat[i];
1080 }
1081 byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
1082 }
1083 }
1084 continue;
1085 }
1086
1087 case ByteCodeInstruction::kMaskPush:
1088 condPtr[1] = POP().fSigned;
1089 maskPtr[1] = maskPtr[0] & condPtr[1];
1090 ++condPtr; ++maskPtr;
1091 continue;
1092 case ByteCodeInstruction::kMaskPop:
1093 --condPtr; --maskPtr;
1094 continue;
1095 case ByteCodeInstruction::kMaskNegate:
1096 maskPtr[0] = maskPtr[-1] & ~condPtr[0];
1097 continue;
1098 case ByteCodeInstruction::kMaskBlend: {
1099 int count = READ8();
1100 I32 m = condPtr[0];
1101 --condPtr; --maskPtr;
1102 for (int i = 0; i < count; ++i) {
1103 sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
1104 --sp;
1105 }
1106 continue;
1107 }
1108 case ByteCodeInstruction::kBranchIfAllFalse: {
1109 int target = READ16();
1110 if (!skvx::any(mask())) {
1111 ip = code + target;
1112 }
1113 continue;
1114 }
1115
1116 case ByteCodeInstruction::kLoopBegin:
1117 contPtr[1] = 0;
1118 loopPtr[1] = loopPtr[0];
1119 ++contPtr; ++loopPtr;
1120 continue;
1121 case ByteCodeInstruction::kLoopNext:
1122 *loopPtr |= *contPtr;
1123 *contPtr = 0;
1124 continue;
1125 case ByteCodeInstruction::kLoopMask:
1126 *loopPtr &= POP().fSigned;
1127 continue;
1128 case ByteCodeInstruction::kLoopEnd:
1129 --contPtr; --loopPtr;
1130 continue;
1131 case ByteCodeInstruction::kLoopBreak:
1132 *loopPtr &= ~mask();
1133 continue;
1134 case ByteCodeInstruction::kLoopContinue: {
1135 I32 m = mask();
1136 *contPtr |= m;
1137 *loopPtr &= ~m;
1138 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -04001139 }
1140 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001141 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001142}
1143
1144}; // class Interpreter
1145
1146#endif // SK_ENABLE_SKSL_INTERPRETER
1147
1148#undef spf
1149
1150void ByteCodeFunction::disassemble() const {
1151#if defined(SK_ENABLE_SKSL_INTERPRETER)
1152 const uint8_t* ip = fCode.data();
1153 while (ip < fCode.data() + fCode.size()) {
1154 printf("%d: ", (int)(ip - fCode.data()));
1155 ip = Interpreter::DisassembleInstruction(ip);
1156 printf("\n");
1157 }
1158#endif
1159}
1160
Brian Osmanb08cc022020-04-02 11:38:40 -04001161bool ByteCode::run(const ByteCodeFunction* f,
1162 float* args, int argCount,
1163 float* outReturn, int returnCount,
1164 const float* uniforms, int uniformCount) const {
1165#if defined(SK_ENABLE_SKSL_INTERPRETER)
1166 Interpreter::VValue stack[128];
1167 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1168 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1169 return false;
1170 }
1171
1172 if (argCount != f->fParameterCount ||
1173 returnCount != f->fReturnCount ||
1174 uniformCount != fUniformSlotCount) {
1175 return false;
1176 }
1177
1178 Interpreter::VValue globals[32];
1179 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1180 return false;
1181 }
1182
1183 // Transpose args into stack
1184 {
1185 float* src = args;
1186 float* dst = (float*)stack;
1187 for (int i = 0; i < argCount; ++i) {
1188 *dst = *src++;
1189 dst += VecWidth;
1190 }
1191 }
1192
1193 bool stripedOutput = false;
1194 float** outArray = outReturn ? &outReturn : nullptr;
1195 if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) {
1196 return false;
1197 }
1198
1199 // Transpose out parameters back
1200 {
1201 float* dst = args;
1202 float* src = (float*)stack;
1203 for (const auto& p : f->fParameters) {
1204 if (p.fIsOutParameter) {
1205 for (int i = p.fSlotCount; i > 0; --i) {
1206 *dst++ = *src;
1207 src += VecWidth;
1208 }
1209 } else {
1210 dst += p.fSlotCount;
1211 src += p.fSlotCount * VecWidth;
1212 }
1213 }
1214 }
1215
1216 return true;
1217#else
1218 SkDEBUGFAIL("ByteCode interpreter not enabled");
1219 return false;
1220#endif
1221}
1222
1223bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
1224 float* args[], int argCount,
1225 float* outReturn[], int returnCount,
1226 const float* uniforms, int uniformCount) const {
1227#if defined(SK_ENABLE_SKSL_INTERPRETER)
1228 Interpreter::VValue stack[128];
1229 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1230 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1231 return false;
1232 }
1233
1234 if (argCount != f->fParameterCount ||
1235 returnCount != f->fReturnCount ||
1236 uniformCount != fUniformSlotCount) {
1237 return false;
1238 }
1239
1240 Interpreter::VValue globals[32];
1241 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1242 return false;
1243 }
1244
1245 // innerRun just takes outArgs, so clear it if the count is zero
1246 if (returnCount == 0) {
1247 outReturn = nullptr;
1248 }
1249
Mike Klein01d42b12020-04-14 15:34:53 -05001250 // The instructions to store to locals and globals mask in the original value,
1251 // so they technically need to be initialized (to any value).
1252 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
1253 stack[i].fFloat = 0.0f;
1254 }
1255 for (int i = 0; i < fGlobalSlotCount; i++) {
1256 globals[i].fFloat = 0.0f;
1257 }
1258
Brian Osmanb08cc022020-04-02 11:38:40 -04001259 int baseIndex = 0;
1260
1261 while (N) {
1262 int w = std::min(N, VecWidth);
1263
1264 // Copy args into stack
1265 for (int i = 0; i < argCount; ++i) {
1266 memcpy((void*)(stack + i), args[i], w * sizeof(float));
1267 }
1268
1269 bool stripedOutput = true;
1270 if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1271 baseIndex)) {
1272 return false;
1273 }
1274
1275 // Copy out parameters back
1276 int slot = 0;
1277 for (const auto& p : f->fParameters) {
1278 if (p.fIsOutParameter) {
1279 for (int i = slot; i < slot + p.fSlotCount; ++i) {
1280 memcpy(args[i], stack + i, w * sizeof(float));
1281 }
1282 }
1283 slot += p.fSlotCount;
1284 }
1285
1286 // Step each argument pointer ahead
1287 for (int i = 0; i < argCount; ++i) {
1288 args[i] += w;
1289 }
1290 N -= w;
1291 baseIndex += w;
1292 }
1293
1294 return true;
1295#else
1296 SkDEBUGFAIL("ByteCode interpreter not enabled");
1297 return false;
1298#endif
1299}
1300
1301} // namespace SkSL
1302
1303#endif