blob: af18c2875a5dd8b49dbef5bd81c214f0295c6d63 [file] [log] [blame]
Brian Osmanb08cc022020-04-02 11:38:40 -04001/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SKSL_STANDALONE
9
10#include "include/core/SkPoint3.h"
11#include "include/private/SkVx.h"
12#include "src/core/SkUtils.h" // sk_unaligned_load
13#include "src/sksl/SkSLByteCode.h"
14#include "src/sksl/SkSLByteCodeGenerator.h"
15#include "src/sksl/SkSLExternalValue.h"
16
17#include <vector>
18
19namespace SkSL {
20
21#if defined(SK_ENABLE_SKSL_INTERPRETER)
22
23constexpr int VecWidth = ByteCode::kVecWidth;
24
25struct Interpreter {
26
27using F32 = skvx::Vec<VecWidth, float>;
28using I32 = skvx::Vec<VecWidth, int32_t>;
29using U32 = skvx::Vec<VecWidth, uint32_t>;
30
31#define READ8() (*(ip++))
32#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
33#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
Brian Osmanab8f3842020-04-07 09:30:44 -040034#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
35 sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
Brian Osmanb08cc022020-04-02 11:38:40 -040036
Brian Osmanab8f3842020-04-07 09:30:44 -040037#define VECTOR_DISASSEMBLE(op, text) \
Brian Osmanb08cc022020-04-02 11:38:40 -040038 case ByteCodeInstruction::op: printf(text); break; \
39 case ByteCodeInstruction::op##2: printf(text "2"); break; \
40 case ByteCodeInstruction::op##3: printf(text "3"); break; \
41 case ByteCodeInstruction::op##4: printf(text "4"); break;
42
43#define VECTOR_MATRIX_DISASSEMBLE(op, text) \
44 VECTOR_DISASSEMBLE(op, text) \
45 case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
46
Brian Osmanb08cc022020-04-02 11:38:40 -040047static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
Brian Osmanab8f3842020-04-07 09:30:44 -040048 auto inst = READ_INST();
Mike Kleina9741ee2020-04-06 08:54:47 -050049 printf("%04x ", (int)inst);
50 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -040051 VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
52 VECTOR_DISASSEMBLE(kAddI, "addi")
53 case ByteCodeInstruction::kAndB: printf("andb"); break;
54 case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
55 case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
56 case ByteCodeInstruction::kCallExternal: {
57 int argumentCount = READ8();
58 int returnCount = READ8();
59 int externalValue = READ8();
60 printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
61 break;
62 }
63 case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
64 VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq")
65 VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq")
66 VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq")
67 VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq")
68 VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt")
69 VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq")
70 VECTOR_DISASSEMBLE(kCompareFLT, "compareflt")
71 VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq")
72 VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt")
73 VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq")
74 VECTOR_DISASSEMBLE(kCompareSLT, "compareslt")
75 VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq")
76 VECTOR_DISASSEMBLE(kCompareUGT, "compareugt")
77 VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
78 VECTOR_DISASSEMBLE(kCompareULT, "compareult")
79 VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
Brian Osmanab8f3842020-04-07 09:30:44 -040080 VECTOR_DISASSEMBLE(kConvertFtoI, "convertftoi")
81 VECTOR_DISASSEMBLE(kConvertStoF, "convertstof")
82 VECTOR_DISASSEMBLE(kConvertUtoF, "convertutof")
Brian Osmanb08cc022020-04-02 11:38:40 -040083 VECTOR_DISASSEMBLE(kCos, "cos")
84 VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
85 VECTOR_DISASSEMBLE(kDivideS, "divideS")
86 VECTOR_DISASSEMBLE(kDivideU, "divideu")
87 VECTOR_MATRIX_DISASSEMBLE(kDup, "dup")
88 case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
89 case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
90 case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -040091 case ByteCodeInstruction::kLoad: printf("load %d", READ8()); break;
92 case ByteCodeInstruction::kLoad2: printf("load2 %d", READ8()); break;
93 case ByteCodeInstruction::kLoad3: printf("load3 %d", READ8()); break;
94 case ByteCodeInstruction::kLoad4: printf("load4 %d", READ8()); break;
95 case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ8()); break;
96 case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ8()); break;
97 case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ8()); break;
98 case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ8()); break;
99 case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ8()); break;
100 case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ8()); break;
101 case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ8()); break;
102 case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400103 case ByteCodeInstruction::kLoadSwizzle: {
104 int target = READ8();
105 int count = READ8();
106 printf("loadswizzle %d %d", target, count);
107 for (int i = 0; i < count; ++i) {
108 printf(", %d", READ8());
109 }
110 break;
111 }
112 case ByteCodeInstruction::kLoadSwizzleGlobal: {
113 int target = READ8();
114 int count = READ8();
115 printf("loadswizzleglobal %d %d", target, count);
116 for (int i = 0; i < count; ++i) {
117 printf(", %d", READ8());
118 }
119 break;
120 }
121 case ByteCodeInstruction::kLoadSwizzleUniform: {
122 int target = READ8();
123 int count = READ8();
124 printf("loadswizzleuniform %d %d", target, count);
125 for (int i = 0; i < count; ++i) {
126 printf(", %d", READ8());
127 }
128 break;
129 }
130 case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break;
131 case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8());
132 break;
133 case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8());
134 break;
135 case ByteCodeInstruction::kMatrixToMatrix: {
136 int srcCols = READ8();
137 int srcRows = READ8();
138 int dstCols = READ8();
139 int dstRows = READ8();
140 printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
141 break;
142 }
143 case ByteCodeInstruction::kMatrixMultiply: {
144 int lCols = READ8();
145 int lRows = READ8();
146 int rCols = READ8();
147 printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
148 break;
149 }
150 VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
151 VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
Brian Osmanab8f3842020-04-07 09:30:44 -0400152 VECTOR_MATRIX_DISASSEMBLE(kNegateF, "negatef")
153 VECTOR_DISASSEMBLE(kNegateI, "negatei")
Brian Osmanb08cc022020-04-02 11:38:40 -0400154 case ByteCodeInstruction::kNotB: printf("notb"); break;
155 case ByteCodeInstruction::kOrB: printf("orb"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -0400156 VECTOR_MATRIX_DISASSEMBLE(kPop, "pop")
Brian Osmanb08cc022020-04-02 11:38:40 -0400157 case ByteCodeInstruction::kPushImmediate: {
158 uint32_t v = READ32();
159 union { uint32_t u; float f; } pun = { v };
160 printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
161 break;
162 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400163 case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ8()); break;
164 case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ8()); break;
165 case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ8()); break;
166 case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400167 VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
168 VECTOR_DISASSEMBLE(kRemainderS, "remainders")
169 VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
170 case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break;
171 case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break;
172 case ByteCodeInstruction::kScalarToMatrix: {
173 int cols = READ8();
174 int rows = READ8();
175 printf("scalartomatrix %dx%d", cols, rows);
176 break;
177 }
178 case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
179 case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
180 case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
181 VECTOR_DISASSEMBLE(kSin, "sin")
Brian Osmanab8f3842020-04-07 09:30:44 -0400182 VECTOR_DISASSEMBLE(kSqrt, "sqrt")
Brian Osmanb08cc022020-04-02 11:38:40 -0400183 case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
184 case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
185 case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
186 case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break;
187 case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break;
188 case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break;
189 case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break;
190 case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break;
191 case ByteCodeInstruction::kStoreSwizzle: {
192 int target = READ8();
193 int count = READ8();
194 printf("storeswizzle %d %d", target, count);
195 for (int i = 0; i < count; ++i) {
196 printf(", %d", READ8());
197 }
198 break;
199 }
200 case ByteCodeInstruction::kStoreSwizzleGlobal: {
201 int target = READ8();
202 int count = READ8();
203 printf("storeswizzleglobal %d %d", target, count);
204 for (int i = 0; i < count; ++i) {
205 printf(", %d", READ8());
206 }
207 break;
208 }
209 case ByteCodeInstruction::kStoreSwizzleIndirect: {
210 int count = READ8();
211 printf("storeswizzleindirect %d", count);
212 for (int i = 0; i < count; ++i) {
213 printf(", %d", READ8());
214 }
215 break;
216 }
217 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
218 int count = READ8();
219 printf("storeswizzleindirectglobal %d", count);
220 for (int i = 0; i < count; ++i) {
221 printf(", %d", READ8());
222 }
223 break;
224 }
225 case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break;
226 case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8());
227 break;
228 VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf")
229 VECTOR_DISASSEMBLE(kSubtractI, "subtracti")
230 case ByteCodeInstruction::kSwizzle: {
231 printf("swizzle %d, ", READ8());
232 int count = READ8();
233 printf("%d", count);
234 for (int i = 0; i < count; ++i) {
235 printf(", %d", READ8());
236 }
237 break;
238 }
239 VECTOR_DISASSEMBLE(kTan, "tan")
Brian Osmanab8f3842020-04-07 09:30:44 -0400240 case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ8()); break;
241 case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ8()); break;
242 case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ8()); break;
243 case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400244 case ByteCodeInstruction::kXorB: printf("xorb"); break;
245 case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
246 case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
247 case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
248 case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
249 case ByteCodeInstruction::kBranchIfAllFalse:
250 printf("branchifallfalse %d", READ16());
251 break;
252 case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
253 case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
254 case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
255 case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
256 case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
257 case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
258 default:
Brian Osmanab8f3842020-04-07 09:30:44 -0400259 ip -= sizeof(ByteCodeInstruction);
Brian Osmanb08cc022020-04-02 11:38:40 -0400260 printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
261 SkASSERT(false);
262 }
263 return ip;
264}
265
Brian Osmanab8f3842020-04-07 09:30:44 -0400266#define VECTOR_BINARY_OP(base, field, op) \
267 case ByteCodeInstruction::base ## 4: \
268 sp[-4] = sp[-4].field op sp[0].field; \
269 POP(); \
270 /* fall through */ \
271 case ByteCodeInstruction::base ## 3: { \
272 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
273 sp[count] = sp[count].field op sp[0].field; \
274 POP(); \
275 } /* fall through */ \
276 case ByteCodeInstruction::base ## 2: { \
277 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
278 sp[count] = sp[count].field op sp[0].field; \
279 POP(); \
280 } /* fall through */ \
281 case ByteCodeInstruction::base: { \
282 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
283 sp[count] = sp[count].field op sp[0].field; \
284 POP(); \
285 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400286 }
287
288// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
Brian Osmanab8f3842020-04-07 09:30:44 -0400289// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
290#define VECTOR_BINARY_MASKED_OP(base, field, op) \
291 case ByteCodeInstruction::base ## 4: \
292 for (int i = 0; i < VecWidth; ++i) { \
293 if (mask()[i]) { \
294 sp[-4].field[i] op ## = sp[0].field[i]; \
295 } \
296 } \
297 POP(); \
298 /* fall through */ \
299 case ByteCodeInstruction::base ## 3: { \
300 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
301 for (int i = 0; i < VecWidth; ++i) { \
302 if (mask()[i]) { \
303 sp[count].field[i] op ## = sp[0].field[i]; \
304 } \
305 } \
306 POP(); \
307 } /* fall through */ \
308 case ByteCodeInstruction::base ## 2: { \
309 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
310 for (int i = 0; i < VecWidth; ++i) { \
311 if (mask()[i]) { \
312 sp[count].field[i] op ## = sp[0].field[i]; \
313 } \
314 } \
315 POP(); \
316 } /* fall through */ \
317 case ByteCodeInstruction::base: { \
318 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
319 for (int i = 0; i < VecWidth; ++i) { \
320 if (mask()[i]) { \
321 sp[count].field[i] op ## = sp[0].field[i]; \
322 } \
323 } \
324 POP(); \
325 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400326 }
327
328
329#define VECTOR_MATRIX_BINARY_OP(base, field, op) \
330 VECTOR_BINARY_OP(base, field, op) \
Brian Osmanab8f3842020-04-07 09:30:44 -0400331 case ByteCodeInstruction::base ## N: { \
Brian Osmanb08cc022020-04-02 11:38:40 -0400332 int count = READ8(); \
333 for (int i = count; i > 0; --i) { \
334 sp[-count] = sp[-count].field op sp[0].field; \
335 POP(); \
336 } \
Brian Osmanab8f3842020-04-07 09:30:44 -0400337 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400338 }
339
Brian Osmanab8f3842020-04-07 09:30:44 -0400340#define VECTOR_BINARY_FN(base, field, fn) \
341 case ByteCodeInstruction::base ## 4: \
342 sp[-4] = fn(sp[-4].field, sp[0].field); \
343 POP(); \
344 /* fall through */ \
345 case ByteCodeInstruction::base ## 3: { \
346 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
347 sp[count] = fn(sp[count].field, sp[0].field); \
348 POP(); \
349 } /* fall through */ \
350 case ByteCodeInstruction::base ## 2: { \
351 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
352 sp[count] = fn(sp[count].field, sp[0].field); \
353 POP(); \
354 } /* fall through */ \
355 case ByteCodeInstruction::base: { \
356 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
357 sp[count] = fn(sp[count].field, sp[0].field); \
358 POP(); \
359 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400360 }
361
Brian Osmanab8f3842020-04-07 09:30:44 -0400362#define VECTOR_UNARY_FN(base, fn, field) \
363 case ByteCodeInstruction::base ## 4: sp[-3] = fn(sp[-3].field); \
364 case ByteCodeInstruction::base ## 3: sp[-2] = fn(sp[-2].field); \
365 case ByteCodeInstruction::base ## 2: sp[-1] = fn(sp[-1].field); \
366 case ByteCodeInstruction::base: sp[ 0] = fn(sp[ 0].field); \
367 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400368
Brian Osmanab8f3842020-04-07 09:30:44 -0400369#define VECTOR_UNARY_FN_VEC(base, fn) \
370 case ByteCodeInstruction::base ## 4: \
371 case ByteCodeInstruction::base ## 3: \
372 case ByteCodeInstruction::base ## 2: \
373 case ByteCodeInstruction::base: { \
374 int count = (int)inst - (int)(ByteCodeInstruction::base) + 1; \
375 float* v = (float*)sp - count + 1; \
376 for (int i = VecWidth * count; i > 0; --i, ++v) { \
377 *v = fn(*v); \
378 } \
379 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400380 }
381
Brian Osmanb08cc022020-04-02 11:38:40 -0400382union VValue {
383 VValue() {}
384 VValue(F32 f) : fFloat(f) {}
385 VValue(I32 s) : fSigned(s) {}
386 VValue(U32 u) : fUnsigned(u) {}
387
388 F32 fFloat;
389 I32 fSigned;
390 U32 fUnsigned;
391};
392
393struct StackFrame {
394 const uint8_t* fCode;
395 const uint8_t* fIP;
396 VValue* fStack;
397 int fParameterCount;
398};
399
400static F32 VecMod(F32 a, F32 b) {
401 return a - skvx::trunc(a / b) * b;
402}
403
404#define spf(index) sp[index].fFloat
405
406static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp,
407 int baseIndex, I32 mask) {
408 int argumentCount = READ8();
409 int returnCount = READ8();
410 int target = READ8();
411 ExternalValue* v = byteCode->fExternalValues[target];
412 sp -= argumentCount - 1;
413
414 float tmpArgs[4];
415 float tmpReturn[4];
416 SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
417 SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
418
419 for (int i = 0; i < VecWidth; ++i) {
420 if (mask[i]) {
421 for (int j = 0; j < argumentCount; ++j) {
422 tmpArgs[j] = sp[j].fFloat[i];
423 }
424 v->call(baseIndex + i, tmpArgs, tmpReturn);
425 for (int j = 0; j < returnCount; ++j) {
426 sp[j].fFloat[i] = tmpReturn[j];
427 }
428 }
429 }
430 sp += returnCount - 1;
431}
432
433static void Inverse2x2(VValue* sp) {
434 F32 a = sp[-3].fFloat,
435 b = sp[-2].fFloat,
436 c = sp[-1].fFloat,
437 d = sp[ 0].fFloat;
438 F32 idet = F32(1) / (a*d - b*c);
439 sp[-3].fFloat = d * idet;
440 sp[-2].fFloat = -b * idet;
441 sp[-1].fFloat = -c * idet;
442 sp[ 0].fFloat = a * idet;
443}
444
445static void Inverse3x3(VValue* sp) {
446 F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat,
447 a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat,
448 a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat;
449 F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
450 a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
451 sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet;
452 sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet;
453 sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet;
454 sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet;
455 sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet;
456 sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet;
457 sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet;
458 sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet;
459 sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet;
460}
461
462static void Inverse4x4(VValue* sp) {
463 F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3),
464 a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2),
465 a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1),
466 a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0);
467
468 F32 b00 = a00 * a11 - a01 * a10,
469 b01 = a00 * a12 - a02 * a10,
470 b02 = a00 * a13 - a03 * a10,
471 b03 = a01 * a12 - a02 * a11,
472 b04 = a01 * a13 - a03 * a11,
473 b05 = a02 * a13 - a03 * a12,
474 b06 = a20 * a31 - a21 * a30,
475 b07 = a20 * a32 - a22 * a30,
476 b08 = a20 * a33 - a23 * a30,
477 b09 = a21 * a32 - a22 * a31,
478 b10 = a21 * a33 - a23 * a31,
479 b11 = a22 * a33 - a23 * a32;
480
481 F32 idet = F32(1) /
482 (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
483
484 b00 *= idet;
485 b01 *= idet;
486 b02 *= idet;
487 b03 *= idet;
488 b04 *= idet;
489 b05 *= idet;
490 b06 *= idet;
491 b07 *= idet;
492 b08 *= idet;
493 b09 *= idet;
494 b10 *= idet;
495 b11 *= idet;
496
497 spf(-15) = a11 * b11 - a12 * b10 + a13 * b09;
498 spf(-14) = a02 * b10 - a01 * b11 - a03 * b09;
499 spf(-13) = a31 * b05 - a32 * b04 + a33 * b03;
500 spf(-12) = a22 * b04 - a21 * b05 - a23 * b03;
501 spf(-11) = a12 * b08 - a10 * b11 - a13 * b07;
502 spf(-10) = a00 * b11 - a02 * b08 + a03 * b07;
503 spf( -9) = a32 * b02 - a30 * b05 - a33 * b01;
504 spf( -8) = a20 * b05 - a22 * b02 + a23 * b01;
505 spf( -7) = a10 * b10 - a11 * b08 + a13 * b06;
506 spf( -6) = a01 * b08 - a00 * b10 - a03 * b06;
507 spf( -5) = a30 * b04 - a31 * b02 + a33 * b00;
508 spf( -4) = a21 * b02 - a20 * b04 - a23 * b00;
509 spf( -3) = a11 * b07 - a10 * b09 - a12 * b06;
510 spf( -2) = a00 * b09 - a01 * b07 + a02 * b06;
511 spf( -1) = a31 * b01 - a30 * b03 - a32 * b00;
512 spf( 0) = a20 * b03 - a21 * b01 + a22 * b00;
513}
514
515static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
516 float* outReturn[], VValue globals[], const float uniforms[],
517 bool stripedOutput, int N, int baseIndex) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400518 // Needs to be the first N non-negative integers, at least as large as VecWidth
519 static const Interpreter::I32 gLanes = {
520 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
521 };
522
523 VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1;
524
525 #define POP() (*(sp--))
526 #define PUSH(v) (sp[1] = v, ++sp)
527
528 const uint8_t* code = f->fCode.data();
529 const uint8_t* ip = code;
530 std::vector<StackFrame> frames;
531
532 I32 condStack[16]; // Independent condition masks
533 I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
534 I32 contStack[16]; // Continue flags for loops
535 I32 loopStack[16]; // Loop execution masks
536 condStack[0] = maskStack[0] = (gLanes < N);
537 contStack[0] = I32( 0);
538 loopStack[0] = I32(~0);
539 I32* condPtr = condStack;
540 I32* maskPtr = maskStack;
541 I32* contPtr = contStack;
542 I32* loopPtr = loopStack;
543
544 if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) ||
545 f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) {
546 return false;
547 }
548
549 auto mask = [&]() { return *maskPtr & *loopPtr; };
550
Brian Osmanb08cc022020-04-02 11:38:40 -0400551 for (;;) {
Brian Osmanab8f3842020-04-07 09:30:44 -0400552#ifdef TRACE
553 printf("at %3d ", (int) (ip - code));
554 disassemble_instruction(ip);
555 printf(" (stack: %d)\n", (int) (sp - stack) + 1);
Brian Osmanb08cc022020-04-02 11:38:40 -0400556#endif
Brian Osmanab8f3842020-04-07 09:30:44 -0400557 ByteCodeInstruction inst = READ_INST();
558 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400559
Brian Osmanab8f3842020-04-07 09:30:44 -0400560 VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
561 VECTOR_BINARY_OP(kAddI, fSigned, +)
Brian Osmanb08cc022020-04-02 11:38:40 -0400562
Brian Osmanab8f3842020-04-07 09:30:44 -0400563 // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
564 case ByteCodeInstruction::kAndB:
565 sp[-1] = sp[-1].fSigned & sp[0].fSigned;
566 POP();
567 continue;
568 case ByteCodeInstruction::kNotB:
569 sp[0] = ~sp[0].fSigned;
570 continue;
571 case ByteCodeInstruction::kOrB:
572 sp[-1] = sp[-1].fSigned | sp[0].fSigned;
573 POP();
574 continue;
575 case ByteCodeInstruction::kXorB:
576 sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
577 POP();
578 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400579
Brian Osmanab8f3842020-04-07 09:30:44 -0400580 case ByteCodeInstruction::kBranch:
581 ip = code + READ16();
582 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400583
Brian Osmanab8f3842020-04-07 09:30:44 -0400584 case ByteCodeInstruction::kCall: {
585 // Precursor code reserved space for the return value, and pushed all parameters to
586 // the stack. Update our bottom of stack to point at the first parameter, and our
587 // sp to point past those parameters (plus space for locals).
588 int target = READ8();
Mike Klein01d42b12020-04-14 15:34:53 -0500589 const ByteCodeFunction* f = byteCode->fFunctions[target].get();
Brian Osmanab8f3842020-04-07 09:30:44 -0400590 if (skvx::any(mask())) {
Mike Klein01d42b12020-04-14 15:34:53 -0500591 frames.push_back({ code, ip, stack, f->fParameterCount });
592 ip = code = f->fCode.data();
593 stack = sp - f->fParameterCount + 1;
594 sp = stack + f->fParameterCount + f->fLocalCount - 1;
595 // As we did in runStriped(), zero locals so they're safe to mask-store into.
596 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
597 stack[i].fFloat = 0.0f;
598 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400599 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400600 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400601 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400602
Brian Osmanab8f3842020-04-07 09:30:44 -0400603 case ByteCodeInstruction::kCallExternal: {
604 CallExternal(byteCode, ip, sp, baseIndex, mask());
605 continue;
606 }
607
608 case ByteCodeInstruction::kClampIndex: {
609 int length = READ8();
610 if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
611 return false;
Brian Osmanb08cc022020-04-02 11:38:40 -0400612 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400613 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400614 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400615
Brian Osmanab8f3842020-04-07 09:30:44 -0400616 VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
617 VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
618 VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
619 VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
620 VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
621 VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
622 VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
623 VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
624 VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
625 VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
626 VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
627 VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
628 VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
629 VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
630 VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
631 VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
632
633 case ByteCodeInstruction::kConvertFtoI4: sp[-3] = skvx::cast<int>(sp[-3].fFloat);
634 case ByteCodeInstruction::kConvertFtoI3: sp[-2] = skvx::cast<int>(sp[-2].fFloat);
635 case ByteCodeInstruction::kConvertFtoI2: sp[-1] = skvx::cast<int>(sp[-1].fFloat);
636 case ByteCodeInstruction::kConvertFtoI: sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
637 continue;
638
639 case ByteCodeInstruction::kConvertStoF4: sp[-3] = skvx::cast<float>(sp[-3].fSigned);
640 case ByteCodeInstruction::kConvertStoF3: sp[-2] = skvx::cast<float>(sp[-2].fSigned);
641 case ByteCodeInstruction::kConvertStoF2: sp[-1] = skvx::cast<float>(sp[-1].fSigned);
642 case ByteCodeInstruction::kConvertStoF: sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
643 continue;
644
645 case ByteCodeInstruction::kConvertUtoF4: sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
646 case ByteCodeInstruction::kConvertUtoF3: sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
647 case ByteCodeInstruction::kConvertUtoF2: sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
648 case ByteCodeInstruction::kConvertUtoF: sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
649 continue;
650
651 VECTOR_UNARY_FN_VEC(kCos, cosf)
652
653 VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
654 VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
655 VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
656
657 case ByteCodeInstruction::kDup4: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
658 case ByteCodeInstruction::kDup3: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
659 case ByteCodeInstruction::kDup2: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
660 case ByteCodeInstruction::kDup : PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
661 continue;
662
663 case ByteCodeInstruction::kDupN: {
664 int count = READ8();
665 memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
666 sp += count;
667 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400668 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400669
Brian Osmanab8f3842020-04-07 09:30:44 -0400670 case ByteCodeInstruction::kInverse2x2:
671 Inverse2x2(sp);
672 continue;
673 case ByteCodeInstruction::kInverse3x3:
674 Inverse3x3(sp);
675 continue;
676 case ByteCodeInstruction::kInverse4x4:
677 Inverse4x4(sp);
678 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400679
Brian Osmanab8f3842020-04-07 09:30:44 -0400680 case ByteCodeInstruction::kLoad4: sp[4] = stack[*ip + 3];
681 case ByteCodeInstruction::kLoad3: sp[3] = stack[*ip + 2];
682 case ByteCodeInstruction::kLoad2: sp[2] = stack[*ip + 1];
683 case ByteCodeInstruction::kLoad: sp[1] = stack[*ip + 0];
684 ++ip;
685 sp += (int)ByteCodeInstruction::kLoad - (int)inst + 1;
686 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400687
Brian Osmanab8f3842020-04-07 09:30:44 -0400688 case ByteCodeInstruction::kLoadGlobal4: sp[4] = globals[*ip + 3];
689 case ByteCodeInstruction::kLoadGlobal3: sp[3] = globals[*ip + 2];
690 case ByteCodeInstruction::kLoadGlobal2: sp[2] = globals[*ip + 1];
691 case ByteCodeInstruction::kLoadGlobal: sp[1] = globals[*ip + 0];
692 ++ip;
693 sp += (int)ByteCodeInstruction::kLoadGlobal - (int)inst + 1;
694 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400695
Brian Osmanab8f3842020-04-07 09:30:44 -0400696 case ByteCodeInstruction::kLoadUniform4: sp[4].fFloat = uniforms[*ip + 3];
697 case ByteCodeInstruction::kLoadUniform3: sp[3].fFloat = uniforms[*ip + 2];
698 case ByteCodeInstruction::kLoadUniform2: sp[2].fFloat = uniforms[*ip + 1];
699 case ByteCodeInstruction::kLoadUniform: sp[1].fFloat = uniforms[*ip + 0];
700 ++ip;
701 sp += (int)ByteCodeInstruction::kLoadUniform - (int)inst + 1;
702 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400703
Brian Osmanab8f3842020-04-07 09:30:44 -0400704 case ByteCodeInstruction::kLoadExtended: {
705 int count = READ8();
706 I32 src = POP().fSigned;
707 I32 m = mask();
708 for (int i = 0; i < count; ++i) {
709 for (int j = 0; j < VecWidth; ++j) {
710 if (m[j]) {
711 sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
Brian Osmanb08cc022020-04-02 11:38:40 -0400712 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400713 }
714 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400715 sp += count;
716 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400717 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400718
Brian Osmanab8f3842020-04-07 09:30:44 -0400719 case ByteCodeInstruction::kLoadExtendedGlobal: {
720 int count = READ8();
721 I32 src = POP().fSigned;
722 I32 m = mask();
723 for (int i = 0; i < count; ++i) {
724 for (int j = 0; j < VecWidth; ++j) {
725 if (m[j]) {
726 sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
727 }
728 }
729 }
730 sp += count;
731 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400732 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400733
Brian Osmanab8f3842020-04-07 09:30:44 -0400734 case ByteCodeInstruction::kLoadExtendedUniform: {
735 int count = READ8();
736 I32 src = POP().fSigned;
737 I32 m = mask();
738 for (int i = 0; i < count; ++i) {
739 for (int j = 0; j < VecWidth; ++j) {
740 if (m[j]) {
741 sp[i + 1].fFloat[j] = uniforms[src[j] + i];
742 }
743 }
744 }
745 sp += count;
746 continue;
747 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400748
Brian Osmanab8f3842020-04-07 09:30:44 -0400749 case ByteCodeInstruction::kLoadSwizzle: {
750 int src = READ8();
751 int count = READ8();
752 for (int i = 0; i < count; ++i) {
753 PUSH(stack[src + *(ip + i)]);
754 }
755 ip += count;
756 continue;
757 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400758
Brian Osmanab8f3842020-04-07 09:30:44 -0400759 case ByteCodeInstruction::kLoadSwizzleGlobal: {
760 int src = READ8();
761 int count = READ8();
762 for (int i = 0; i < count; ++i) {
763 PUSH(globals[src + *(ip + i)]);
764 }
765 ip += count;
766 continue;
767 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400768
Brian Osmanab8f3842020-04-07 09:30:44 -0400769 case ByteCodeInstruction::kLoadSwizzleUniform: {
770 int src = READ8();
771 int count = READ8();
772 for (int i = 0; i < count; ++i) {
773 PUSH(F32(uniforms[src + *(ip + i)]));
774 }
775 ip += count;
776 continue;
777 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400778
Brian Osmanab8f3842020-04-07 09:30:44 -0400779 case ByteCodeInstruction::kMatrixToMatrix: {
780 int srcCols = READ8();
781 int srcRows = READ8();
782 int dstCols = READ8();
783 int dstRows = READ8();
784 SkASSERT(srcCols >= 2 && srcCols <= 4);
785 SkASSERT(srcRows >= 2 && srcRows <= 4);
786 SkASSERT(dstCols >= 2 && dstCols <= 4);
787 SkASSERT(dstRows >= 2 && dstRows <= 4);
788 F32 tmp[16];
789 memset(tmp, 0, sizeof(tmp));
790 tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
791 for (int c = srcCols - 1; c >= 0; --c) {
792 for (int r = srcRows - 1; r >= 0; --r) {
793 tmp[c*4 + r] = POP().fFloat;
794 }
795 }
796 for (int c = 0; c < dstCols; ++c) {
797 for (int r = 0; r < dstRows; ++r) {
798 PUSH(tmp[c*4 + r]);
799 }
800 }
801 continue;
802 }
803
804 case ByteCodeInstruction::kMatrixMultiply: {
805 int lCols = READ8();
806 int lRows = READ8();
807 int rCols = READ8();
808 int rRows = lCols;
809 F32 tmp[16] = { 0.0f };
810 F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
811 F32* A = B - (lCols * lRows);
812 for (int c = 0; c < rCols; ++c) {
813 for (int r = 0; r < lRows; ++r) {
814 for (int j = 0; j < lCols; ++j) {
815 tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
816 }
817 }
818 }
819 sp -= (lCols * lRows) + (rCols * rRows);
820 memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
821 sp += (rCols * lRows);
822 continue;
823 }
824
825 VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
826 VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
827
828 case ByteCodeInstruction::kNegateF4: sp[-3] = -sp[-3].fFloat;
829 case ByteCodeInstruction::kNegateF3: sp[-2] = -sp[-2].fFloat;
830 case ByteCodeInstruction::kNegateF2: sp[-1] = -sp[-1].fFloat;
831 case ByteCodeInstruction::kNegateF: sp[ 0] = -sp[ 0].fFloat;
832 continue;
833
834 case ByteCodeInstruction::kNegateFN: {
835 int count = READ8();
836 for (int i = count - 1; i >= 0; --i) {
837 sp[-i] = -sp[-i].fFloat;
838 }
839 continue;
840 }
841
842 case ByteCodeInstruction::kNegateI4: sp[-3] = -sp[-3].fSigned;
843 case ByteCodeInstruction::kNegateI3: sp[-2] = -sp[-2].fSigned;
844 case ByteCodeInstruction::kNegateI2: sp[-1] = -sp[-1].fSigned;
845 case ByteCodeInstruction::kNegateI: sp[ 0] = -sp[ 0].fSigned;
846 continue;
847
848 case ByteCodeInstruction::kPop4: POP();
849 case ByteCodeInstruction::kPop3: POP();
850 case ByteCodeInstruction::kPop2: POP();
851 case ByteCodeInstruction::kPop: POP();
852 continue;
853
854 case ByteCodeInstruction::kPopN:
855 sp -= READ8();
856 continue;
857
858 case ByteCodeInstruction::kPushImmediate:
859 PUSH(U32(READ32()));
860 continue;
861
862 case ByteCodeInstruction::kReadExternal:
863 case ByteCodeInstruction::kReadExternal2:
864 case ByteCodeInstruction::kReadExternal3:
865 case ByteCodeInstruction::kReadExternal4: {
866 int count = (int)ByteCodeInstruction::kReadExternal - (int)inst + 1;
867 int src = READ8();
868 float tmp[4];
869 I32 m = mask();
870 for (int i = 0; i < VecWidth; ++i) {
871 if (m[i]) {
872 byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
873 for (int j = 0; j < count; ++j) {
874 sp[j + 1].fFloat[i] = tmp[j];
875 }
876 }
877 }
878 sp += count;
879 continue;
880 }
881
882 VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
883 VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
884 VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
885
886 case ByteCodeInstruction::kReserve:
887 sp += READ8();
888 continue;
889
890 case ByteCodeInstruction::kReturn: {
891 int count = READ8();
892 if (frames.empty()) {
893 if (outReturn) {
894 VValue* src = sp - count + 1;
895 if (stripedOutput) {
896 for (int i = 0; i < count; ++i) {
897 memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
898 ++src;
899 }
900 } else {
901 float* outPtr = outReturn[0];
902 for (int i = 0; i < count; ++i) {
903 for (int j = 0; j < N; ++j) {
904 outPtr[count * j] = src->fFloat[j];
905 }
906 ++outPtr;
907 ++src;
908 }
909 }
910 }
911 return true;
912 } else {
913 // When we were called, the caller reserved stack space for their copy of our
914 // return value, then 'stack' was positioned after that, where our parameters
915 // were placed. Copy our return values to their reserved area.
916 memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
917
918 // Now move the stack pointer to the end of the passed-in parameters. This odd
919 // calling convention requires the caller to pop the arguments after calling,
920 // but allows them to store any out-parameters back during that unwinding.
921 // After that sequence finishes, the return value will be the top of the stack.
922 const StackFrame& frame(frames.back());
923 sp = stack + frame.fParameterCount - 1;
924 stack = frame.fStack;
925 code = frame.fCode;
926 ip = frame.fIP;
927 frames.pop_back();
928 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400929 }
930 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400931
932 case ByteCodeInstruction::kScalarToMatrix: {
933 int cols = READ8();
934 int rows = READ8();
935 VValue v = POP();
936 for (int c = 0; c < cols; ++c) {
937 for (int r = 0; r < rows; ++r) {
938 PUSH(c == r ? v : F32(0.0f));
939 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400940 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400941 continue;
942 }
943
944 case ByteCodeInstruction::kShiftLeft:
945 sp[0] = sp[0].fSigned << READ8();
946 continue;
947 case ByteCodeInstruction::kShiftRightS:
948 sp[0] = sp[0].fSigned >> READ8();
949 continue;
950 case ByteCodeInstruction::kShiftRightU:
951 sp[0] = sp[0].fUnsigned >> READ8();
952 continue;
953
954 VECTOR_UNARY_FN_VEC(kSin, sinf)
955 VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
956
957 case ByteCodeInstruction::kStore4:
958 stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
959 case ByteCodeInstruction::kStore3:
960 stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
961 case ByteCodeInstruction::kStore2:
962 stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
963 case ByteCodeInstruction::kStore:
964 stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
965 ++ip;
966 continue;
967
968 case ByteCodeInstruction::kStoreGlobal4:
969 globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
970 case ByteCodeInstruction::kStoreGlobal3:
971 globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
972 case ByteCodeInstruction::kStoreGlobal2:
973 globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
974 case ByteCodeInstruction::kStoreGlobal:
975 globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
976 ++ip;
977 continue;
978
979 case ByteCodeInstruction::kStoreExtended: {
980 int count = READ8();
981 I32 target = POP().fSigned;
982 VValue* src = sp - count + 1;
983 I32 m = mask();
984 for (int i = 0; i < count; ++i) {
985 for (int j = 0; j < VecWidth; ++j) {
986 if (m[j]) {
987 stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
988 }
989 }
990 }
991 sp -= count;
992 continue;
993 }
994 case ByteCodeInstruction::kStoreExtendedGlobal: {
995 int count = READ8();
996 I32 target = POP().fSigned;
997 VValue* src = sp - count + 1;
998 I32 m = mask();
999 for (int i = 0; i < count; ++i) {
1000 for (int j = 0; j < VecWidth; ++j) {
1001 if (m[j]) {
1002 globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
1003 }
1004 }
1005 }
1006 sp -= count;
1007 continue;
1008 }
1009
1010 case ByteCodeInstruction::kStoreSwizzle: {
1011 int target = READ8();
1012 int count = READ8();
1013 for (int i = count - 1; i >= 0; --i) {
1014 stack[target + *(ip + i)] = skvx::if_then_else(
1015 mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
1016 }
1017 ip += count;
1018 continue;
1019 }
1020
1021 case ByteCodeInstruction::kStoreSwizzleGlobal: {
1022 int target = READ8();
1023 int count = READ8();
1024 for (int i = count - 1; i >= 0; --i) {
1025 globals[target + *(ip + i)] = skvx::if_then_else(
1026 mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
1027 }
1028 ip += count;
1029 continue;
1030 }
1031
1032 case ByteCodeInstruction::kStoreSwizzleIndirect: {
1033 int count = READ8();
1034 I32 target = POP().fSigned;
1035 I32 m = mask();
1036 for (int i = count - 1; i >= 0; --i) {
1037 I32 v = POP().fSigned;
1038 for (int j = 0; j < VecWidth; ++j) {
1039 if (m[j]) {
1040 stack[target[j] + *(ip + i)].fSigned[j] = v[j];
1041 }
1042 }
1043 }
1044 ip += count;
1045 continue;
1046 }
1047
1048 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
1049 int count = READ8();
1050 I32 target = POP().fSigned;
1051 I32 m = mask();
1052 for (int i = count - 1; i >= 0; --i) {
1053 I32 v = POP().fSigned;
1054 for (int j = 0; j < VecWidth; ++j) {
1055 if (m[j]) {
1056 globals[target[j] + *(ip + i)].fSigned[j] = v[j];
1057 }
1058 }
1059 }
1060 ip += count;
1061 continue;
1062 }
1063
1064 VECTOR_BINARY_OP(kSubtractI, fSigned, -)
1065 VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
1066
1067 case ByteCodeInstruction::kSwizzle: {
1068 VValue tmp[4];
1069 for (int i = READ8() - 1; i >= 0; --i) {
1070 tmp[i] = POP();
1071 }
1072 for (int i = READ8() - 1; i >= 0; --i) {
1073 PUSH(tmp[READ8()]);
1074 }
1075 continue;
1076 }
1077
1078 VECTOR_UNARY_FN_VEC(kTan, tanf)
1079
1080 case ByteCodeInstruction::kWriteExternal4:
1081 case ByteCodeInstruction::kWriteExternal3:
1082 case ByteCodeInstruction::kWriteExternal2:
1083 case ByteCodeInstruction::kWriteExternal: {
1084 int count = (int)ByteCodeInstruction::kWriteExternal - (int)inst + 1;
1085 int target = READ8();
1086 float tmp[4];
1087 I32 m = mask();
1088 sp -= count;
1089 for (int i = 0; i < VecWidth; ++i) {
1090 if (m[i]) {
1091 for (int j = 0; j < count; ++j) {
1092 tmp[j] = sp[j + 1].fFloat[i];
1093 }
1094 byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
1095 }
1096 }
1097 continue;
1098 }
1099
1100 case ByteCodeInstruction::kMaskPush:
1101 condPtr[1] = POP().fSigned;
1102 maskPtr[1] = maskPtr[0] & condPtr[1];
1103 ++condPtr; ++maskPtr;
1104 continue;
1105 case ByteCodeInstruction::kMaskPop:
1106 --condPtr; --maskPtr;
1107 continue;
1108 case ByteCodeInstruction::kMaskNegate:
1109 maskPtr[0] = maskPtr[-1] & ~condPtr[0];
1110 continue;
1111 case ByteCodeInstruction::kMaskBlend: {
1112 int count = READ8();
1113 I32 m = condPtr[0];
1114 --condPtr; --maskPtr;
1115 for (int i = 0; i < count; ++i) {
1116 sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
1117 --sp;
1118 }
1119 continue;
1120 }
1121 case ByteCodeInstruction::kBranchIfAllFalse: {
1122 int target = READ16();
1123 if (!skvx::any(mask())) {
1124 ip = code + target;
1125 }
1126 continue;
1127 }
1128
1129 case ByteCodeInstruction::kLoopBegin:
1130 contPtr[1] = 0;
1131 loopPtr[1] = loopPtr[0];
1132 ++contPtr; ++loopPtr;
1133 continue;
1134 case ByteCodeInstruction::kLoopNext:
1135 *loopPtr |= *contPtr;
1136 *contPtr = 0;
1137 continue;
1138 case ByteCodeInstruction::kLoopMask:
1139 *loopPtr &= POP().fSigned;
1140 continue;
1141 case ByteCodeInstruction::kLoopEnd:
1142 --contPtr; --loopPtr;
1143 continue;
1144 case ByteCodeInstruction::kLoopBreak:
1145 *loopPtr &= ~mask();
1146 continue;
1147 case ByteCodeInstruction::kLoopContinue: {
1148 I32 m = mask();
1149 *contPtr |= m;
1150 *loopPtr &= ~m;
1151 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -04001152 }
1153 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001154 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001155}
1156
1157}; // class Interpreter
1158
1159#endif // SK_ENABLE_SKSL_INTERPRETER
1160
1161#undef spf
1162
1163void ByteCodeFunction::disassemble() const {
1164#if defined(SK_ENABLE_SKSL_INTERPRETER)
1165 const uint8_t* ip = fCode.data();
1166 while (ip < fCode.data() + fCode.size()) {
1167 printf("%d: ", (int)(ip - fCode.data()));
1168 ip = Interpreter::DisassembleInstruction(ip);
1169 printf("\n");
1170 }
1171#endif
1172}
1173
Brian Osmanb08cc022020-04-02 11:38:40 -04001174bool ByteCode::run(const ByteCodeFunction* f,
1175 float* args, int argCount,
1176 float* outReturn, int returnCount,
1177 const float* uniforms, int uniformCount) const {
1178#if defined(SK_ENABLE_SKSL_INTERPRETER)
1179 Interpreter::VValue stack[128];
1180 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1181 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1182 return false;
1183 }
1184
1185 if (argCount != f->fParameterCount ||
1186 returnCount != f->fReturnCount ||
1187 uniformCount != fUniformSlotCount) {
1188 return false;
1189 }
1190
1191 Interpreter::VValue globals[32];
1192 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1193 return false;
1194 }
1195
1196 // Transpose args into stack
1197 {
1198 float* src = args;
1199 float* dst = (float*)stack;
1200 for (int i = 0; i < argCount; ++i) {
1201 *dst = *src++;
1202 dst += VecWidth;
1203 }
1204 }
1205
1206 bool stripedOutput = false;
1207 float** outArray = outReturn ? &outReturn : nullptr;
1208 if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) {
1209 return false;
1210 }
1211
1212 // Transpose out parameters back
1213 {
1214 float* dst = args;
1215 float* src = (float*)stack;
1216 for (const auto& p : f->fParameters) {
1217 if (p.fIsOutParameter) {
1218 for (int i = p.fSlotCount; i > 0; --i) {
1219 *dst++ = *src;
1220 src += VecWidth;
1221 }
1222 } else {
1223 dst += p.fSlotCount;
1224 src += p.fSlotCount * VecWidth;
1225 }
1226 }
1227 }
1228
1229 return true;
1230#else
1231 SkDEBUGFAIL("ByteCode interpreter not enabled");
1232 return false;
1233#endif
1234}
1235
1236bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
1237 float* args[], int argCount,
1238 float* outReturn[], int returnCount,
1239 const float* uniforms, int uniformCount) const {
1240#if defined(SK_ENABLE_SKSL_INTERPRETER)
1241 Interpreter::VValue stack[128];
1242 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1243 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1244 return false;
1245 }
1246
1247 if (argCount != f->fParameterCount ||
1248 returnCount != f->fReturnCount ||
1249 uniformCount != fUniformSlotCount) {
1250 return false;
1251 }
1252
1253 Interpreter::VValue globals[32];
1254 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1255 return false;
1256 }
1257
1258 // innerRun just takes outArgs, so clear it if the count is zero
1259 if (returnCount == 0) {
1260 outReturn = nullptr;
1261 }
1262
Mike Klein01d42b12020-04-14 15:34:53 -05001263 // The instructions to store to locals and globals mask in the original value,
1264 // so they technically need to be initialized (to any value).
1265 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
1266 stack[i].fFloat = 0.0f;
1267 }
1268 for (int i = 0; i < fGlobalSlotCount; i++) {
1269 globals[i].fFloat = 0.0f;
1270 }
1271
Brian Osmanb08cc022020-04-02 11:38:40 -04001272 int baseIndex = 0;
1273
1274 while (N) {
1275 int w = std::min(N, VecWidth);
1276
1277 // Copy args into stack
1278 for (int i = 0; i < argCount; ++i) {
1279 memcpy((void*)(stack + i), args[i], w * sizeof(float));
1280 }
1281
1282 bool stripedOutput = true;
1283 if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1284 baseIndex)) {
1285 return false;
1286 }
1287
1288 // Copy out parameters back
1289 int slot = 0;
1290 for (const auto& p : f->fParameters) {
1291 if (p.fIsOutParameter) {
1292 for (int i = slot; i < slot + p.fSlotCount; ++i) {
1293 memcpy(args[i], stack + i, w * sizeof(float));
1294 }
1295 }
1296 slot += p.fSlotCount;
1297 }
1298
1299 // Step each argument pointer ahead
1300 for (int i = 0; i < argCount; ++i) {
1301 args[i] += w;
1302 }
1303 N -= w;
1304 baseIndex += w;
1305 }
1306
1307 return true;
1308#else
1309 SkDEBUGFAIL("ByteCode interpreter not enabled");
1310 return false;
1311#endif
1312}
1313
1314} // namespace SkSL
1315
1316#endif