blob: 99f08452d2c8a6fa4eaaacc7c464ddb9211fdacf [file] [log] [blame]
Brian Osmanb08cc022020-04-02 11:38:40 -04001/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SKSL_STANDALONE
9
10#include "include/core/SkPoint3.h"
11#include "include/private/SkVx.h"
12#include "src/core/SkUtils.h" // sk_unaligned_load
13#include "src/sksl/SkSLByteCode.h"
14#include "src/sksl/SkSLByteCodeGenerator.h"
15#include "src/sksl/SkSLExternalValue.h"
16
17#include <vector>
18
19namespace SkSL {
20
21#if defined(SK_ENABLE_SKSL_INTERPRETER)
22
23constexpr int VecWidth = ByteCode::kVecWidth;
24
25struct Interpreter {
26
27using F32 = skvx::Vec<VecWidth, float>;
28using I32 = skvx::Vec<VecWidth, int32_t>;
29using U32 = skvx::Vec<VecWidth, uint32_t>;
30
31#define READ8() (*(ip++))
32#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
33#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
Brian Osmanab8f3842020-04-07 09:30:44 -040034#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
35 sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
Brian Osmanb08cc022020-04-02 11:38:40 -040036
Brian Osmanab8f3842020-04-07 09:30:44 -040037#define VECTOR_DISASSEMBLE(op, text) \
Brian Osmanb08cc022020-04-02 11:38:40 -040038 case ByteCodeInstruction::op: printf(text); break; \
39 case ByteCodeInstruction::op##2: printf(text "2"); break; \
40 case ByteCodeInstruction::op##3: printf(text "3"); break; \
41 case ByteCodeInstruction::op##4: printf(text "4"); break;
42
43#define VECTOR_MATRIX_DISASSEMBLE(op, text) \
44 VECTOR_DISASSEMBLE(op, text) \
45 case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
46
Brian Osmanb08cc022020-04-02 11:38:40 -040047static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
Brian Osmanab8f3842020-04-07 09:30:44 -040048 auto inst = READ_INST();
Mike Kleina9741ee2020-04-06 08:54:47 -050049 printf("%04x ", (int)inst);
50 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -040051 VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
52 VECTOR_DISASSEMBLE(kAddI, "addi")
53 case ByteCodeInstruction::kAndB: printf("andb"); break;
54 case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
55 case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
56 case ByteCodeInstruction::kCallExternal: {
57 int argumentCount = READ8();
58 int returnCount = READ8();
59 int externalValue = READ8();
60 printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
61 break;
62 }
63 case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
64 VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq")
65 VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq")
66 VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq")
67 VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq")
68 VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt")
69 VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq")
70 VECTOR_DISASSEMBLE(kCompareFLT, "compareflt")
71 VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq")
72 VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt")
73 VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq")
74 VECTOR_DISASSEMBLE(kCompareSLT, "compareslt")
75 VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq")
76 VECTOR_DISASSEMBLE(kCompareUGT, "compareugt")
77 VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
78 VECTOR_DISASSEMBLE(kCompareULT, "compareult")
79 VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
Brian Osmanab8f3842020-04-07 09:30:44 -040080 VECTOR_DISASSEMBLE(kConvertFtoI, "convertftoi")
81 VECTOR_DISASSEMBLE(kConvertStoF, "convertstof")
82 VECTOR_DISASSEMBLE(kConvertUtoF, "convertutof")
Brian Osmanb08cc022020-04-02 11:38:40 -040083 VECTOR_DISASSEMBLE(kCos, "cos")
84 VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
85 VECTOR_DISASSEMBLE(kDivideS, "divideS")
86 VECTOR_DISASSEMBLE(kDivideU, "divideu")
87 VECTOR_MATRIX_DISASSEMBLE(kDup, "dup")
88 case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
89 case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
90 case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -040091 case ByteCodeInstruction::kLoad: printf("load %d", READ8()); break;
92 case ByteCodeInstruction::kLoad2: printf("load2 %d", READ8()); break;
93 case ByteCodeInstruction::kLoad3: printf("load3 %d", READ8()); break;
94 case ByteCodeInstruction::kLoad4: printf("load4 %d", READ8()); break;
95 case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ8()); break;
96 case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ8()); break;
97 case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ8()); break;
98 case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ8()); break;
99 case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ8()); break;
100 case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ8()); break;
101 case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ8()); break;
102 case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400103 case ByteCodeInstruction::kLoadSwizzle: {
104 int target = READ8();
105 int count = READ8();
106 printf("loadswizzle %d %d", target, count);
107 for (int i = 0; i < count; ++i) {
108 printf(", %d", READ8());
109 }
110 break;
111 }
112 case ByteCodeInstruction::kLoadSwizzleGlobal: {
113 int target = READ8();
114 int count = READ8();
115 printf("loadswizzleglobal %d %d", target, count);
116 for (int i = 0; i < count; ++i) {
117 printf(", %d", READ8());
118 }
119 break;
120 }
121 case ByteCodeInstruction::kLoadSwizzleUniform: {
122 int target = READ8();
123 int count = READ8();
124 printf("loadswizzleuniform %d %d", target, count);
125 for (int i = 0; i < count; ++i) {
126 printf(", %d", READ8());
127 }
128 break;
129 }
130 case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break;
131 case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8());
132 break;
133 case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8());
134 break;
135 case ByteCodeInstruction::kMatrixToMatrix: {
136 int srcCols = READ8();
137 int srcRows = READ8();
138 int dstCols = READ8();
139 int dstRows = READ8();
140 printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
141 break;
142 }
143 case ByteCodeInstruction::kMatrixMultiply: {
144 int lCols = READ8();
145 int lRows = READ8();
146 int rCols = READ8();
147 printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
148 break;
149 }
150 VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
151 VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
Brian Osmanab8f3842020-04-07 09:30:44 -0400152 VECTOR_MATRIX_DISASSEMBLE(kNegateF, "negatef")
153 VECTOR_DISASSEMBLE(kNegateI, "negatei")
Brian Osmanb08cc022020-04-02 11:38:40 -0400154 case ByteCodeInstruction::kNotB: printf("notb"); break;
155 case ByteCodeInstruction::kOrB: printf("orb"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -0400156 VECTOR_MATRIX_DISASSEMBLE(kPop, "pop")
Brian Osmanb08cc022020-04-02 11:38:40 -0400157 case ByteCodeInstruction::kPushImmediate: {
158 uint32_t v = READ32();
159 union { uint32_t u; float f; } pun = { v };
160 printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
161 break;
162 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400163 case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ8()); break;
164 case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ8()); break;
165 case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ8()); break;
166 case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400167 VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
168 VECTOR_DISASSEMBLE(kRemainderS, "remainders")
169 VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
170 case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break;
171 case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break;
172 case ByteCodeInstruction::kScalarToMatrix: {
173 int cols = READ8();
174 int rows = READ8();
175 printf("scalartomatrix %dx%d", cols, rows);
176 break;
177 }
178 case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
179 case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
180 case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
181 VECTOR_DISASSEMBLE(kSin, "sin")
Brian Osmanab8f3842020-04-07 09:30:44 -0400182 VECTOR_DISASSEMBLE(kSqrt, "sqrt")
Brian Osmanb08cc022020-04-02 11:38:40 -0400183 case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
184 case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
185 case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
186 case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break;
187 case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break;
188 case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break;
189 case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break;
190 case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break;
191 case ByteCodeInstruction::kStoreSwizzle: {
192 int target = READ8();
193 int count = READ8();
194 printf("storeswizzle %d %d", target, count);
195 for (int i = 0; i < count; ++i) {
196 printf(", %d", READ8());
197 }
198 break;
199 }
200 case ByteCodeInstruction::kStoreSwizzleGlobal: {
201 int target = READ8();
202 int count = READ8();
203 printf("storeswizzleglobal %d %d", target, count);
204 for (int i = 0; i < count; ++i) {
205 printf(", %d", READ8());
206 }
207 break;
208 }
209 case ByteCodeInstruction::kStoreSwizzleIndirect: {
210 int count = READ8();
211 printf("storeswizzleindirect %d", count);
212 for (int i = 0; i < count; ++i) {
213 printf(", %d", READ8());
214 }
215 break;
216 }
217 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
218 int count = READ8();
219 printf("storeswizzleindirectglobal %d", count);
220 for (int i = 0; i < count; ++i) {
221 printf(", %d", READ8());
222 }
223 break;
224 }
225 case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break;
226 case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8());
227 break;
228 VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf")
229 VECTOR_DISASSEMBLE(kSubtractI, "subtracti")
230 case ByteCodeInstruction::kSwizzle: {
231 printf("swizzle %d, ", READ8());
232 int count = READ8();
233 printf("%d", count);
234 for (int i = 0; i < count; ++i) {
235 printf(", %d", READ8());
236 }
237 break;
238 }
239 VECTOR_DISASSEMBLE(kTan, "tan")
Brian Osmanab8f3842020-04-07 09:30:44 -0400240 case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ8()); break;
241 case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ8()); break;
242 case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ8()); break;
243 case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400244 case ByteCodeInstruction::kXorB: printf("xorb"); break;
245 case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
246 case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
247 case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
248 case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
249 case ByteCodeInstruction::kBranchIfAllFalse:
250 printf("branchifallfalse %d", READ16());
251 break;
252 case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
253 case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
254 case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
255 case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
256 case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
257 case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
258 default:
Brian Osmanab8f3842020-04-07 09:30:44 -0400259 ip -= sizeof(ByteCodeInstruction);
Brian Osmanb08cc022020-04-02 11:38:40 -0400260 printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
261 SkASSERT(false);
262 }
263 return ip;
264}
265
Brian Osmanab8f3842020-04-07 09:30:44 -0400266#define VECTOR_BINARY_OP(base, field, op) \
267 case ByteCodeInstruction::base ## 4: \
268 sp[-4] = sp[-4].field op sp[0].field; \
269 POP(); \
270 /* fall through */ \
271 case ByteCodeInstruction::base ## 3: { \
272 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
273 sp[count] = sp[count].field op sp[0].field; \
274 POP(); \
275 } /* fall through */ \
276 case ByteCodeInstruction::base ## 2: { \
277 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
278 sp[count] = sp[count].field op sp[0].field; \
279 POP(); \
280 } /* fall through */ \
281 case ByteCodeInstruction::base: { \
282 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
283 sp[count] = sp[count].field op sp[0].field; \
284 POP(); \
285 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400286 }
287
288// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
Brian Osmanab8f3842020-04-07 09:30:44 -0400289// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
290#define VECTOR_BINARY_MASKED_OP(base, field, op) \
291 case ByteCodeInstruction::base ## 4: \
292 for (int i = 0; i < VecWidth; ++i) { \
293 if (mask()[i]) { \
294 sp[-4].field[i] op ## = sp[0].field[i]; \
295 } \
296 } \
297 POP(); \
298 /* fall through */ \
299 case ByteCodeInstruction::base ## 3: { \
300 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
301 for (int i = 0; i < VecWidth; ++i) { \
302 if (mask()[i]) { \
303 sp[count].field[i] op ## = sp[0].field[i]; \
304 } \
305 } \
306 POP(); \
307 } /* fall through */ \
308 case ByteCodeInstruction::base ## 2: { \
309 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
310 for (int i = 0; i < VecWidth; ++i) { \
311 if (mask()[i]) { \
312 sp[count].field[i] op ## = sp[0].field[i]; \
313 } \
314 } \
315 POP(); \
316 } /* fall through */ \
317 case ByteCodeInstruction::base: { \
318 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
319 for (int i = 0; i < VecWidth; ++i) { \
320 if (mask()[i]) { \
321 sp[count].field[i] op ## = sp[0].field[i]; \
322 } \
323 } \
324 POP(); \
325 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400326 }
327
328
329#define VECTOR_MATRIX_BINARY_OP(base, field, op) \
330 VECTOR_BINARY_OP(base, field, op) \
Brian Osmanab8f3842020-04-07 09:30:44 -0400331 case ByteCodeInstruction::base ## N: { \
Brian Osmanb08cc022020-04-02 11:38:40 -0400332 int count = READ8(); \
333 for (int i = count; i > 0; --i) { \
334 sp[-count] = sp[-count].field op sp[0].field; \
335 POP(); \
336 } \
Brian Osmanab8f3842020-04-07 09:30:44 -0400337 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400338 }
339
Brian Osmanab8f3842020-04-07 09:30:44 -0400340#define VECTOR_BINARY_FN(base, field, fn) \
341 case ByteCodeInstruction::base ## 4: \
342 sp[-4] = fn(sp[-4].field, sp[0].field); \
343 POP(); \
344 /* fall through */ \
345 case ByteCodeInstruction::base ## 3: { \
346 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
347 sp[count] = fn(sp[count].field, sp[0].field); \
348 POP(); \
349 } /* fall through */ \
350 case ByteCodeInstruction::base ## 2: { \
351 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
352 sp[count] = fn(sp[count].field, sp[0].field); \
353 POP(); \
354 } /* fall through */ \
355 case ByteCodeInstruction::base: { \
356 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
357 sp[count] = fn(sp[count].field, sp[0].field); \
358 POP(); \
359 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400360 }
361
Brian Osmanab8f3842020-04-07 09:30:44 -0400362#define VECTOR_UNARY_FN(base, fn, field) \
363 case ByteCodeInstruction::base ## 4: sp[-3] = fn(sp[-3].field); \
364 case ByteCodeInstruction::base ## 3: sp[-2] = fn(sp[-2].field); \
365 case ByteCodeInstruction::base ## 2: sp[-1] = fn(sp[-1].field); \
366 case ByteCodeInstruction::base: sp[ 0] = fn(sp[ 0].field); \
367 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400368
Brian Osmanab8f3842020-04-07 09:30:44 -0400369#define VECTOR_UNARY_FN_VEC(base, fn) \
370 case ByteCodeInstruction::base ## 4: \
371 case ByteCodeInstruction::base ## 3: \
372 case ByteCodeInstruction::base ## 2: \
373 case ByteCodeInstruction::base: { \
374 int count = (int)inst - (int)(ByteCodeInstruction::base) + 1; \
375 float* v = (float*)sp - count + 1; \
376 for (int i = VecWidth * count; i > 0; --i, ++v) { \
377 *v = fn(*v); \
378 } \
379 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400380 }
381
Brian Osmanb08cc022020-04-02 11:38:40 -0400382union VValue {
383 VValue() {}
384 VValue(F32 f) : fFloat(f) {}
385 VValue(I32 s) : fSigned(s) {}
386 VValue(U32 u) : fUnsigned(u) {}
387
388 F32 fFloat;
389 I32 fSigned;
390 U32 fUnsigned;
391};
392
393struct StackFrame {
394 const uint8_t* fCode;
395 const uint8_t* fIP;
396 VValue* fStack;
397 int fParameterCount;
398};
399
400static F32 VecMod(F32 a, F32 b) {
401 return a - skvx::trunc(a / b) * b;
402}
403
404#define spf(index) sp[index].fFloat
405
406static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp,
407 int baseIndex, I32 mask) {
408 int argumentCount = READ8();
409 int returnCount = READ8();
410 int target = READ8();
411 ExternalValue* v = byteCode->fExternalValues[target];
412 sp -= argumentCount - 1;
413
414 float tmpArgs[4];
415 float tmpReturn[4];
416 SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
417 SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
418
419 for (int i = 0; i < VecWidth; ++i) {
420 if (mask[i]) {
421 for (int j = 0; j < argumentCount; ++j) {
422 tmpArgs[j] = sp[j].fFloat[i];
423 }
424 v->call(baseIndex + i, tmpArgs, tmpReturn);
425 for (int j = 0; j < returnCount; ++j) {
426 sp[j].fFloat[i] = tmpReturn[j];
427 }
428 }
429 }
430 sp += returnCount - 1;
431}
432
433static void Inverse2x2(VValue* sp) {
434 F32 a = sp[-3].fFloat,
435 b = sp[-2].fFloat,
436 c = sp[-1].fFloat,
437 d = sp[ 0].fFloat;
438 F32 idet = F32(1) / (a*d - b*c);
439 sp[-3].fFloat = d * idet;
440 sp[-2].fFloat = -b * idet;
441 sp[-1].fFloat = -c * idet;
442 sp[ 0].fFloat = a * idet;
443}
444
445static void Inverse3x3(VValue* sp) {
446 F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat,
447 a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat,
448 a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat;
449 F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
450 a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
451 sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet;
452 sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet;
453 sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet;
454 sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet;
455 sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet;
456 sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet;
457 sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet;
458 sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet;
459 sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet;
460}
461
462static void Inverse4x4(VValue* sp) {
463 F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3),
464 a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2),
465 a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1),
466 a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0);
467
468 F32 b00 = a00 * a11 - a01 * a10,
469 b01 = a00 * a12 - a02 * a10,
470 b02 = a00 * a13 - a03 * a10,
471 b03 = a01 * a12 - a02 * a11,
472 b04 = a01 * a13 - a03 * a11,
473 b05 = a02 * a13 - a03 * a12,
474 b06 = a20 * a31 - a21 * a30,
475 b07 = a20 * a32 - a22 * a30,
476 b08 = a20 * a33 - a23 * a30,
477 b09 = a21 * a32 - a22 * a31,
478 b10 = a21 * a33 - a23 * a31,
479 b11 = a22 * a33 - a23 * a32;
480
481 F32 idet = F32(1) /
482 (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
483
484 b00 *= idet;
485 b01 *= idet;
486 b02 *= idet;
487 b03 *= idet;
488 b04 *= idet;
489 b05 *= idet;
490 b06 *= idet;
491 b07 *= idet;
492 b08 *= idet;
493 b09 *= idet;
494 b10 *= idet;
495 b11 *= idet;
496
497 spf(-15) = a11 * b11 - a12 * b10 + a13 * b09;
498 spf(-14) = a02 * b10 - a01 * b11 - a03 * b09;
499 spf(-13) = a31 * b05 - a32 * b04 + a33 * b03;
500 spf(-12) = a22 * b04 - a21 * b05 - a23 * b03;
501 spf(-11) = a12 * b08 - a10 * b11 - a13 * b07;
502 spf(-10) = a00 * b11 - a02 * b08 + a03 * b07;
503 spf( -9) = a32 * b02 - a30 * b05 - a33 * b01;
504 spf( -8) = a20 * b05 - a22 * b02 + a23 * b01;
505 spf( -7) = a10 * b10 - a11 * b08 + a13 * b06;
506 spf( -6) = a01 * b08 - a00 * b10 - a03 * b06;
507 spf( -5) = a30 * b04 - a31 * b02 + a33 * b00;
508 spf( -4) = a21 * b02 - a20 * b04 - a23 * b00;
509 spf( -3) = a11 * b07 - a10 * b09 - a12 * b06;
510 spf( -2) = a00 * b09 - a01 * b07 + a02 * b06;
511 spf( -1) = a31 * b01 - a30 * b03 - a32 * b00;
512 spf( 0) = a20 * b03 - a21 * b01 + a22 * b00;
513}
514
515static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
516 float* outReturn[], VValue globals[], const float uniforms[],
517 bool stripedOutput, int N, int baseIndex) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400518 // Needs to be the first N non-negative integers, at least as large as VecWidth
519 static const Interpreter::I32 gLanes = {
520 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
521 };
522
523 VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1;
524
525 #define POP() (*(sp--))
526 #define PUSH(v) (sp[1] = v, ++sp)
527
528 const uint8_t* code = f->fCode.data();
529 const uint8_t* ip = code;
530 std::vector<StackFrame> frames;
531
532 I32 condStack[16]; // Independent condition masks
533 I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
534 I32 contStack[16]; // Continue flags for loops
535 I32 loopStack[16]; // Loop execution masks
536 condStack[0] = maskStack[0] = (gLanes < N);
537 contStack[0] = I32( 0);
538 loopStack[0] = I32(~0);
539 I32* condPtr = condStack;
540 I32* maskPtr = maskStack;
541 I32* contPtr = contStack;
542 I32* loopPtr = loopStack;
543
544 if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) ||
545 f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) {
546 return false;
547 }
548
549 auto mask = [&]() { return *maskPtr & *loopPtr; };
550
Brian Osmanb08cc022020-04-02 11:38:40 -0400551 for (;;) {
Brian Osmanab8f3842020-04-07 09:30:44 -0400552#ifdef TRACE
553 printf("at %3d ", (int) (ip - code));
554 disassemble_instruction(ip);
555 printf(" (stack: %d)\n", (int) (sp - stack) + 1);
Brian Osmanb08cc022020-04-02 11:38:40 -0400556#endif
Brian Osmanab8f3842020-04-07 09:30:44 -0400557 ByteCodeInstruction inst = READ_INST();
558 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400559
Brian Osmanab8f3842020-04-07 09:30:44 -0400560 VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
561 VECTOR_BINARY_OP(kAddI, fSigned, +)
Brian Osmanb08cc022020-04-02 11:38:40 -0400562
Brian Osmanab8f3842020-04-07 09:30:44 -0400563 // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
564 case ByteCodeInstruction::kAndB:
565 sp[-1] = sp[-1].fSigned & sp[0].fSigned;
566 POP();
567 continue;
568 case ByteCodeInstruction::kNotB:
569 sp[0] = ~sp[0].fSigned;
570 continue;
571 case ByteCodeInstruction::kOrB:
572 sp[-1] = sp[-1].fSigned | sp[0].fSigned;
573 POP();
574 continue;
575 case ByteCodeInstruction::kXorB:
576 sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
577 POP();
578 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400579
Brian Osmanab8f3842020-04-07 09:30:44 -0400580 case ByteCodeInstruction::kBranch:
581 ip = code + READ16();
582 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400583
Brian Osmanab8f3842020-04-07 09:30:44 -0400584 case ByteCodeInstruction::kCall: {
585 // Precursor code reserved space for the return value, and pushed all parameters to
586 // the stack. Update our bottom of stack to point at the first parameter, and our
587 // sp to point past those parameters (plus space for locals).
588 int target = READ8();
589 const ByteCodeFunction* fun = byteCode->fFunctions[target].get();
590 if (skvx::any(mask())) {
591 frames.push_back({ code, ip, stack, fun->fParameterCount });
592 ip = code = fun->fCode.data();
593 stack = sp - fun->fParameterCount + 1;
594 sp = stack + fun->fParameterCount + fun->fLocalCount - 1;
Brian Osmanb08cc022020-04-02 11:38:40 -0400595 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400596 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400597 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400598
Brian Osmanab8f3842020-04-07 09:30:44 -0400599 case ByteCodeInstruction::kCallExternal: {
600 CallExternal(byteCode, ip, sp, baseIndex, mask());
601 continue;
602 }
603
604 case ByteCodeInstruction::kClampIndex: {
605 int length = READ8();
606 if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
607 return false;
Brian Osmanb08cc022020-04-02 11:38:40 -0400608 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400609 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400610 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400611
Brian Osmanab8f3842020-04-07 09:30:44 -0400612 VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
613 VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
614 VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
615 VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
616 VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
617 VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
618 VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
619 VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
620 VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
621 VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
622 VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
623 VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
624 VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
625 VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
626 VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
627 VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
628
629 case ByteCodeInstruction::kConvertFtoI4: sp[-3] = skvx::cast<int>(sp[-3].fFloat);
630 case ByteCodeInstruction::kConvertFtoI3: sp[-2] = skvx::cast<int>(sp[-2].fFloat);
631 case ByteCodeInstruction::kConvertFtoI2: sp[-1] = skvx::cast<int>(sp[-1].fFloat);
632 case ByteCodeInstruction::kConvertFtoI: sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
633 continue;
634
635 case ByteCodeInstruction::kConvertStoF4: sp[-3] = skvx::cast<float>(sp[-3].fSigned);
636 case ByteCodeInstruction::kConvertStoF3: sp[-2] = skvx::cast<float>(sp[-2].fSigned);
637 case ByteCodeInstruction::kConvertStoF2: sp[-1] = skvx::cast<float>(sp[-1].fSigned);
638 case ByteCodeInstruction::kConvertStoF: sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
639 continue;
640
641 case ByteCodeInstruction::kConvertUtoF4: sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
642 case ByteCodeInstruction::kConvertUtoF3: sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
643 case ByteCodeInstruction::kConvertUtoF2: sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
644 case ByteCodeInstruction::kConvertUtoF: sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
645 continue;
646
647 VECTOR_UNARY_FN_VEC(kCos, cosf)
648
649 VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
650 VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
651 VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
652
653 case ByteCodeInstruction::kDup4: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
654 case ByteCodeInstruction::kDup3: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
655 case ByteCodeInstruction::kDup2: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
656 case ByteCodeInstruction::kDup : PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
657 continue;
658
659 case ByteCodeInstruction::kDupN: {
660 int count = READ8();
661 memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
662 sp += count;
663 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400664 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400665
Brian Osmanab8f3842020-04-07 09:30:44 -0400666 case ByteCodeInstruction::kInverse2x2:
667 Inverse2x2(sp);
668 continue;
669 case ByteCodeInstruction::kInverse3x3:
670 Inverse3x3(sp);
671 continue;
672 case ByteCodeInstruction::kInverse4x4:
673 Inverse4x4(sp);
674 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400675
Brian Osmanab8f3842020-04-07 09:30:44 -0400676 case ByteCodeInstruction::kLoad4: sp[4] = stack[*ip + 3];
677 case ByteCodeInstruction::kLoad3: sp[3] = stack[*ip + 2];
678 case ByteCodeInstruction::kLoad2: sp[2] = stack[*ip + 1];
679 case ByteCodeInstruction::kLoad: sp[1] = stack[*ip + 0];
680 ++ip;
681 sp += (int)ByteCodeInstruction::kLoad - (int)inst + 1;
682 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400683
Brian Osmanab8f3842020-04-07 09:30:44 -0400684 case ByteCodeInstruction::kLoadGlobal4: sp[4] = globals[*ip + 3];
685 case ByteCodeInstruction::kLoadGlobal3: sp[3] = globals[*ip + 2];
686 case ByteCodeInstruction::kLoadGlobal2: sp[2] = globals[*ip + 1];
687 case ByteCodeInstruction::kLoadGlobal: sp[1] = globals[*ip + 0];
688 ++ip;
689 sp += (int)ByteCodeInstruction::kLoadGlobal - (int)inst + 1;
690 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400691
Brian Osmanab8f3842020-04-07 09:30:44 -0400692 case ByteCodeInstruction::kLoadUniform4: sp[4].fFloat = uniforms[*ip + 3];
693 case ByteCodeInstruction::kLoadUniform3: sp[3].fFloat = uniforms[*ip + 2];
694 case ByteCodeInstruction::kLoadUniform2: sp[2].fFloat = uniforms[*ip + 1];
695 case ByteCodeInstruction::kLoadUniform: sp[1].fFloat = uniforms[*ip + 0];
696 ++ip;
697 sp += (int)ByteCodeInstruction::kLoadUniform - (int)inst + 1;
698 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400699
Brian Osmanab8f3842020-04-07 09:30:44 -0400700 case ByteCodeInstruction::kLoadExtended: {
701 int count = READ8();
702 I32 src = POP().fSigned;
703 I32 m = mask();
704 for (int i = 0; i < count; ++i) {
705 for (int j = 0; j < VecWidth; ++j) {
706 if (m[j]) {
707 sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
Brian Osmanb08cc022020-04-02 11:38:40 -0400708 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400709 }
710 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400711 sp += count;
712 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400713 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400714
Brian Osmanab8f3842020-04-07 09:30:44 -0400715 case ByteCodeInstruction::kLoadExtendedGlobal: {
716 int count = READ8();
717 I32 src = POP().fSigned;
718 I32 m = mask();
719 for (int i = 0; i < count; ++i) {
720 for (int j = 0; j < VecWidth; ++j) {
721 if (m[j]) {
722 sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
723 }
724 }
725 }
726 sp += count;
727 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400728 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400729
Brian Osmanab8f3842020-04-07 09:30:44 -0400730 case ByteCodeInstruction::kLoadExtendedUniform: {
731 int count = READ8();
732 I32 src = POP().fSigned;
733 I32 m = mask();
734 for (int i = 0; i < count; ++i) {
735 for (int j = 0; j < VecWidth; ++j) {
736 if (m[j]) {
737 sp[i + 1].fFloat[j] = uniforms[src[j] + i];
738 }
739 }
740 }
741 sp += count;
742 continue;
743 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400744
Brian Osmanab8f3842020-04-07 09:30:44 -0400745 case ByteCodeInstruction::kLoadSwizzle: {
746 int src = READ8();
747 int count = READ8();
748 for (int i = 0; i < count; ++i) {
749 PUSH(stack[src + *(ip + i)]);
750 }
751 ip += count;
752 continue;
753 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400754
Brian Osmanab8f3842020-04-07 09:30:44 -0400755 case ByteCodeInstruction::kLoadSwizzleGlobal: {
756 int src = READ8();
757 int count = READ8();
758 for (int i = 0; i < count; ++i) {
759 PUSH(globals[src + *(ip + i)]);
760 }
761 ip += count;
762 continue;
763 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400764
Brian Osmanab8f3842020-04-07 09:30:44 -0400765 case ByteCodeInstruction::kLoadSwizzleUniform: {
766 int src = READ8();
767 int count = READ8();
768 for (int i = 0; i < count; ++i) {
769 PUSH(F32(uniforms[src + *(ip + i)]));
770 }
771 ip += count;
772 continue;
773 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400774
Brian Osmanab8f3842020-04-07 09:30:44 -0400775 case ByteCodeInstruction::kMatrixToMatrix: {
776 int srcCols = READ8();
777 int srcRows = READ8();
778 int dstCols = READ8();
779 int dstRows = READ8();
780 SkASSERT(srcCols >= 2 && srcCols <= 4);
781 SkASSERT(srcRows >= 2 && srcRows <= 4);
782 SkASSERT(dstCols >= 2 && dstCols <= 4);
783 SkASSERT(dstRows >= 2 && dstRows <= 4);
784 F32 tmp[16];
785 memset(tmp, 0, sizeof(tmp));
786 tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
787 for (int c = srcCols - 1; c >= 0; --c) {
788 for (int r = srcRows - 1; r >= 0; --r) {
789 tmp[c*4 + r] = POP().fFloat;
790 }
791 }
792 for (int c = 0; c < dstCols; ++c) {
793 for (int r = 0; r < dstRows; ++r) {
794 PUSH(tmp[c*4 + r]);
795 }
796 }
797 continue;
798 }
799
800 case ByteCodeInstruction::kMatrixMultiply: {
801 int lCols = READ8();
802 int lRows = READ8();
803 int rCols = READ8();
804 int rRows = lCols;
805 F32 tmp[16] = { 0.0f };
806 F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
807 F32* A = B - (lCols * lRows);
808 for (int c = 0; c < rCols; ++c) {
809 for (int r = 0; r < lRows; ++r) {
810 for (int j = 0; j < lCols; ++j) {
811 tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
812 }
813 }
814 }
815 sp -= (lCols * lRows) + (rCols * rRows);
816 memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
817 sp += (rCols * lRows);
818 continue;
819 }
820
821 VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
822 VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
823
824 case ByteCodeInstruction::kNegateF4: sp[-3] = -sp[-3].fFloat;
825 case ByteCodeInstruction::kNegateF3: sp[-2] = -sp[-2].fFloat;
826 case ByteCodeInstruction::kNegateF2: sp[-1] = -sp[-1].fFloat;
827 case ByteCodeInstruction::kNegateF: sp[ 0] = -sp[ 0].fFloat;
828 continue;
829
830 case ByteCodeInstruction::kNegateFN: {
831 int count = READ8();
832 for (int i = count - 1; i >= 0; --i) {
833 sp[-i] = -sp[-i].fFloat;
834 }
835 continue;
836 }
837
838 case ByteCodeInstruction::kNegateI4: sp[-3] = -sp[-3].fSigned;
839 case ByteCodeInstruction::kNegateI3: sp[-2] = -sp[-2].fSigned;
840 case ByteCodeInstruction::kNegateI2: sp[-1] = -sp[-1].fSigned;
841 case ByteCodeInstruction::kNegateI: sp[ 0] = -sp[ 0].fSigned;
842 continue;
843
844 case ByteCodeInstruction::kPop4: POP();
845 case ByteCodeInstruction::kPop3: POP();
846 case ByteCodeInstruction::kPop2: POP();
847 case ByteCodeInstruction::kPop: POP();
848 continue;
849
850 case ByteCodeInstruction::kPopN:
851 sp -= READ8();
852 continue;
853
854 case ByteCodeInstruction::kPushImmediate:
855 PUSH(U32(READ32()));
856 continue;
857
858 case ByteCodeInstruction::kReadExternal:
859 case ByteCodeInstruction::kReadExternal2:
860 case ByteCodeInstruction::kReadExternal3:
861 case ByteCodeInstruction::kReadExternal4: {
862 int count = (int)ByteCodeInstruction::kReadExternal - (int)inst + 1;
863 int src = READ8();
864 float tmp[4];
865 I32 m = mask();
866 for (int i = 0; i < VecWidth; ++i) {
867 if (m[i]) {
868 byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
869 for (int j = 0; j < count; ++j) {
870 sp[j + 1].fFloat[i] = tmp[j];
871 }
872 }
873 }
874 sp += count;
875 continue;
876 }
877
878 VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
879 VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
880 VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
881
882 case ByteCodeInstruction::kReserve:
883 sp += READ8();
884 continue;
885
886 case ByteCodeInstruction::kReturn: {
887 int count = READ8();
888 if (frames.empty()) {
889 if (outReturn) {
890 VValue* src = sp - count + 1;
891 if (stripedOutput) {
892 for (int i = 0; i < count; ++i) {
893 memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
894 ++src;
895 }
896 } else {
897 float* outPtr = outReturn[0];
898 for (int i = 0; i < count; ++i) {
899 for (int j = 0; j < N; ++j) {
900 outPtr[count * j] = src->fFloat[j];
901 }
902 ++outPtr;
903 ++src;
904 }
905 }
906 }
907 return true;
908 } else {
909 // When we were called, the caller reserved stack space for their copy of our
910 // return value, then 'stack' was positioned after that, where our parameters
911 // were placed. Copy our return values to their reserved area.
912 memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
913
914 // Now move the stack pointer to the end of the passed-in parameters. This odd
915 // calling convention requires the caller to pop the arguments after calling,
916 // but allows them to store any out-parameters back during that unwinding.
917 // After that sequence finishes, the return value will be the top of the stack.
918 const StackFrame& frame(frames.back());
919 sp = stack + frame.fParameterCount - 1;
920 stack = frame.fStack;
921 code = frame.fCode;
922 ip = frame.fIP;
923 frames.pop_back();
924 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400925 }
926 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400927
928 case ByteCodeInstruction::kScalarToMatrix: {
929 int cols = READ8();
930 int rows = READ8();
931 VValue v = POP();
932 for (int c = 0; c < cols; ++c) {
933 for (int r = 0; r < rows; ++r) {
934 PUSH(c == r ? v : F32(0.0f));
935 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400936 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400937 continue;
938 }
939
940 case ByteCodeInstruction::kShiftLeft:
941 sp[0] = sp[0].fSigned << READ8();
942 continue;
943 case ByteCodeInstruction::kShiftRightS:
944 sp[0] = sp[0].fSigned >> READ8();
945 continue;
946 case ByteCodeInstruction::kShiftRightU:
947 sp[0] = sp[0].fUnsigned >> READ8();
948 continue;
949
950 VECTOR_UNARY_FN_VEC(kSin, sinf)
951 VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
952
953 case ByteCodeInstruction::kStore4:
954 stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
955 case ByteCodeInstruction::kStore3:
956 stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
957 case ByteCodeInstruction::kStore2:
958 stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
959 case ByteCodeInstruction::kStore:
960 stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
961 ++ip;
962 continue;
963
964 case ByteCodeInstruction::kStoreGlobal4:
965 globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
966 case ByteCodeInstruction::kStoreGlobal3:
967 globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
968 case ByteCodeInstruction::kStoreGlobal2:
969 globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
970 case ByteCodeInstruction::kStoreGlobal:
971 globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
972 ++ip;
973 continue;
974
975 case ByteCodeInstruction::kStoreExtended: {
976 int count = READ8();
977 I32 target = POP().fSigned;
978 VValue* src = sp - count + 1;
979 I32 m = mask();
980 for (int i = 0; i < count; ++i) {
981 for (int j = 0; j < VecWidth; ++j) {
982 if (m[j]) {
983 stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
984 }
985 }
986 }
987 sp -= count;
988 continue;
989 }
990 case ByteCodeInstruction::kStoreExtendedGlobal: {
991 int count = READ8();
992 I32 target = POP().fSigned;
993 VValue* src = sp - count + 1;
994 I32 m = mask();
995 for (int i = 0; i < count; ++i) {
996 for (int j = 0; j < VecWidth; ++j) {
997 if (m[j]) {
998 globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
999 }
1000 }
1001 }
1002 sp -= count;
1003 continue;
1004 }
1005
1006 case ByteCodeInstruction::kStoreSwizzle: {
1007 int target = READ8();
1008 int count = READ8();
1009 for (int i = count - 1; i >= 0; --i) {
1010 stack[target + *(ip + i)] = skvx::if_then_else(
1011 mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
1012 }
1013 ip += count;
1014 continue;
1015 }
1016
1017 case ByteCodeInstruction::kStoreSwizzleGlobal: {
1018 int target = READ8();
1019 int count = READ8();
1020 for (int i = count - 1; i >= 0; --i) {
1021 globals[target + *(ip + i)] = skvx::if_then_else(
1022 mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
1023 }
1024 ip += count;
1025 continue;
1026 }
1027
1028 case ByteCodeInstruction::kStoreSwizzleIndirect: {
1029 int count = READ8();
1030 I32 target = POP().fSigned;
1031 I32 m = mask();
1032 for (int i = count - 1; i >= 0; --i) {
1033 I32 v = POP().fSigned;
1034 for (int j = 0; j < VecWidth; ++j) {
1035 if (m[j]) {
1036 stack[target[j] + *(ip + i)].fSigned[j] = v[j];
1037 }
1038 }
1039 }
1040 ip += count;
1041 continue;
1042 }
1043
1044 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
1045 int count = READ8();
1046 I32 target = POP().fSigned;
1047 I32 m = mask();
1048 for (int i = count - 1; i >= 0; --i) {
1049 I32 v = POP().fSigned;
1050 for (int j = 0; j < VecWidth; ++j) {
1051 if (m[j]) {
1052 globals[target[j] + *(ip + i)].fSigned[j] = v[j];
1053 }
1054 }
1055 }
1056 ip += count;
1057 continue;
1058 }
1059
1060 VECTOR_BINARY_OP(kSubtractI, fSigned, -)
1061 VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
1062
1063 case ByteCodeInstruction::kSwizzle: {
1064 VValue tmp[4];
1065 for (int i = READ8() - 1; i >= 0; --i) {
1066 tmp[i] = POP();
1067 }
1068 for (int i = READ8() - 1; i >= 0; --i) {
1069 PUSH(tmp[READ8()]);
1070 }
1071 continue;
1072 }
1073
1074 VECTOR_UNARY_FN_VEC(kTan, tanf)
1075
1076 case ByteCodeInstruction::kWriteExternal4:
1077 case ByteCodeInstruction::kWriteExternal3:
1078 case ByteCodeInstruction::kWriteExternal2:
1079 case ByteCodeInstruction::kWriteExternal: {
1080 int count = (int)ByteCodeInstruction::kWriteExternal - (int)inst + 1;
1081 int target = READ8();
1082 float tmp[4];
1083 I32 m = mask();
1084 sp -= count;
1085 for (int i = 0; i < VecWidth; ++i) {
1086 if (m[i]) {
1087 for (int j = 0; j < count; ++j) {
1088 tmp[j] = sp[j + 1].fFloat[i];
1089 }
1090 byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
1091 }
1092 }
1093 continue;
1094 }
1095
1096 case ByteCodeInstruction::kMaskPush:
1097 condPtr[1] = POP().fSigned;
1098 maskPtr[1] = maskPtr[0] & condPtr[1];
1099 ++condPtr; ++maskPtr;
1100 continue;
1101 case ByteCodeInstruction::kMaskPop:
1102 --condPtr; --maskPtr;
1103 continue;
1104 case ByteCodeInstruction::kMaskNegate:
1105 maskPtr[0] = maskPtr[-1] & ~condPtr[0];
1106 continue;
1107 case ByteCodeInstruction::kMaskBlend: {
1108 int count = READ8();
1109 I32 m = condPtr[0];
1110 --condPtr; --maskPtr;
1111 for (int i = 0; i < count; ++i) {
1112 sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
1113 --sp;
1114 }
1115 continue;
1116 }
1117 case ByteCodeInstruction::kBranchIfAllFalse: {
1118 int target = READ16();
1119 if (!skvx::any(mask())) {
1120 ip = code + target;
1121 }
1122 continue;
1123 }
1124
1125 case ByteCodeInstruction::kLoopBegin:
1126 contPtr[1] = 0;
1127 loopPtr[1] = loopPtr[0];
1128 ++contPtr; ++loopPtr;
1129 continue;
1130 case ByteCodeInstruction::kLoopNext:
1131 *loopPtr |= *contPtr;
1132 *contPtr = 0;
1133 continue;
1134 case ByteCodeInstruction::kLoopMask:
1135 *loopPtr &= POP().fSigned;
1136 continue;
1137 case ByteCodeInstruction::kLoopEnd:
1138 --contPtr; --loopPtr;
1139 continue;
1140 case ByteCodeInstruction::kLoopBreak:
1141 *loopPtr &= ~mask();
1142 continue;
1143 case ByteCodeInstruction::kLoopContinue: {
1144 I32 m = mask();
1145 *contPtr |= m;
1146 *loopPtr &= ~m;
1147 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -04001148 }
1149 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001150 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001151}
1152
1153}; // class Interpreter
1154
1155#endif // SK_ENABLE_SKSL_INTERPRETER
1156
1157#undef spf
1158
1159void ByteCodeFunction::disassemble() const {
1160#if defined(SK_ENABLE_SKSL_INTERPRETER)
1161 const uint8_t* ip = fCode.data();
1162 while (ip < fCode.data() + fCode.size()) {
1163 printf("%d: ", (int)(ip - fCode.data()));
1164 ip = Interpreter::DisassembleInstruction(ip);
1165 printf("\n");
1166 }
1167#endif
1168}
1169
Brian Osmanb08cc022020-04-02 11:38:40 -04001170bool ByteCode::run(const ByteCodeFunction* f,
1171 float* args, int argCount,
1172 float* outReturn, int returnCount,
1173 const float* uniforms, int uniformCount) const {
1174#if defined(SK_ENABLE_SKSL_INTERPRETER)
1175 Interpreter::VValue stack[128];
1176 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1177 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1178 return false;
1179 }
1180
1181 if (argCount != f->fParameterCount ||
1182 returnCount != f->fReturnCount ||
1183 uniformCount != fUniformSlotCount) {
1184 return false;
1185 }
1186
1187 Interpreter::VValue globals[32];
1188 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1189 return false;
1190 }
1191
1192 // Transpose args into stack
1193 {
1194 float* src = args;
1195 float* dst = (float*)stack;
1196 for (int i = 0; i < argCount; ++i) {
1197 *dst = *src++;
1198 dst += VecWidth;
1199 }
1200 }
1201
1202 bool stripedOutput = false;
1203 float** outArray = outReturn ? &outReturn : nullptr;
1204 if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) {
1205 return false;
1206 }
1207
1208 // Transpose out parameters back
1209 {
1210 float* dst = args;
1211 float* src = (float*)stack;
1212 for (const auto& p : f->fParameters) {
1213 if (p.fIsOutParameter) {
1214 for (int i = p.fSlotCount; i > 0; --i) {
1215 *dst++ = *src;
1216 src += VecWidth;
1217 }
1218 } else {
1219 dst += p.fSlotCount;
1220 src += p.fSlotCount * VecWidth;
1221 }
1222 }
1223 }
1224
1225 return true;
1226#else
1227 SkDEBUGFAIL("ByteCode interpreter not enabled");
1228 return false;
1229#endif
1230}
1231
1232bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
1233 float* args[], int argCount,
1234 float* outReturn[], int returnCount,
1235 const float* uniforms, int uniformCount) const {
1236#if defined(SK_ENABLE_SKSL_INTERPRETER)
1237 Interpreter::VValue stack[128];
1238 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1239 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1240 return false;
1241 }
1242
1243 if (argCount != f->fParameterCount ||
1244 returnCount != f->fReturnCount ||
1245 uniformCount != fUniformSlotCount) {
1246 return false;
1247 }
1248
1249 Interpreter::VValue globals[32];
1250 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1251 return false;
1252 }
1253
1254 // innerRun just takes outArgs, so clear it if the count is zero
1255 if (returnCount == 0) {
1256 outReturn = nullptr;
1257 }
1258
1259 int baseIndex = 0;
1260
1261 while (N) {
1262 int w = std::min(N, VecWidth);
1263
1264 // Copy args into stack
1265 for (int i = 0; i < argCount; ++i) {
1266 memcpy((void*)(stack + i), args[i], w * sizeof(float));
1267 }
1268
1269 bool stripedOutput = true;
1270 if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1271 baseIndex)) {
1272 return false;
1273 }
1274
1275 // Copy out parameters back
1276 int slot = 0;
1277 for (const auto& p : f->fParameters) {
1278 if (p.fIsOutParameter) {
1279 for (int i = slot; i < slot + p.fSlotCount; ++i) {
1280 memcpy(args[i], stack + i, w * sizeof(float));
1281 }
1282 }
1283 slot += p.fSlotCount;
1284 }
1285
1286 // Step each argument pointer ahead
1287 for (int i = 0; i < argCount; ++i) {
1288 args[i] += w;
1289 }
1290 N -= w;
1291 baseIndex += w;
1292 }
1293
1294 return true;
1295#else
1296 SkDEBUGFAIL("ByteCode interpreter not enabled");
1297 return false;
1298#endif
1299}
1300
1301} // namespace SkSL
1302
1303#endif