blob: 3af0f6f879c6d2c73b4011ba1e03fe7e79ba564b [file] [log] [blame]
Brian Osmanb08cc022020-04-02 11:38:40 -04001/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SKSL_STANDALONE
9
10#include "include/core/SkPoint3.h"
11#include "include/private/SkVx.h"
12#include "src/core/SkUtils.h" // sk_unaligned_load
13#include "src/sksl/SkSLByteCode.h"
14#include "src/sksl/SkSLByteCodeGenerator.h"
15#include "src/sksl/SkSLExternalValue.h"
16
17#include <vector>
18
19namespace SkSL {
20
21#if defined(SK_ENABLE_SKSL_INTERPRETER)
22
23constexpr int VecWidth = ByteCode::kVecWidth;
24
25struct Interpreter {
26
27using F32 = skvx::Vec<VecWidth, float>;
28using I32 = skvx::Vec<VecWidth, int32_t>;
29using U32 = skvx::Vec<VecWidth, uint32_t>;
30
31#define READ8() (*(ip++))
32#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
33#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
Brian Osmanab8f3842020-04-07 09:30:44 -040034#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
35 sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
Brian Osmanb08cc022020-04-02 11:38:40 -040036
Brian Osmanab8f3842020-04-07 09:30:44 -040037#define VECTOR_DISASSEMBLE(op, text) \
Brian Osmanb08cc022020-04-02 11:38:40 -040038 case ByteCodeInstruction::op: printf(text); break; \
39 case ByteCodeInstruction::op##2: printf(text "2"); break; \
40 case ByteCodeInstruction::op##3: printf(text "3"); break; \
41 case ByteCodeInstruction::op##4: printf(text "4"); break;
42
43#define VECTOR_MATRIX_DISASSEMBLE(op, text) \
44 VECTOR_DISASSEMBLE(op, text) \
45 case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
46
Brian Osmanb08cc022020-04-02 11:38:40 -040047static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
Brian Osmanab8f3842020-04-07 09:30:44 -040048 auto inst = READ_INST();
Mike Kleina9741ee2020-04-06 08:54:47 -050049 printf("%04x ", (int)inst);
50 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -040051 VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
52 VECTOR_DISASSEMBLE(kAddI, "addi")
53 case ByteCodeInstruction::kAndB: printf("andb"); break;
Mike Reed8520e762020-04-30 12:06:23 -040054 VECTOR_DISASSEMBLE(kATan, "atan")
Brian Osmanb08cc022020-04-02 11:38:40 -040055 case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
56 case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
57 case ByteCodeInstruction::kCallExternal: {
58 int argumentCount = READ8();
59 int returnCount = READ8();
60 int externalValue = READ8();
61 printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
62 break;
63 }
64 case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
65 VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq")
66 VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq")
67 VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq")
68 VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq")
69 VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt")
70 VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq")
71 VECTOR_DISASSEMBLE(kCompareFLT, "compareflt")
72 VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq")
73 VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt")
74 VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq")
75 VECTOR_DISASSEMBLE(kCompareSLT, "compareslt")
76 VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq")
77 VECTOR_DISASSEMBLE(kCompareUGT, "compareugt")
78 VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
79 VECTOR_DISASSEMBLE(kCompareULT, "compareult")
80 VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
Brian Osmanab8f3842020-04-07 09:30:44 -040081 VECTOR_DISASSEMBLE(kConvertFtoI, "convertftoi")
82 VECTOR_DISASSEMBLE(kConvertStoF, "convertstof")
83 VECTOR_DISASSEMBLE(kConvertUtoF, "convertutof")
Brian Osmanb08cc022020-04-02 11:38:40 -040084 VECTOR_DISASSEMBLE(kCos, "cos")
85 VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
86 VECTOR_DISASSEMBLE(kDivideS, "divideS")
87 VECTOR_DISASSEMBLE(kDivideU, "divideu")
88 VECTOR_MATRIX_DISASSEMBLE(kDup, "dup")
Mike Reed8520e762020-04-30 12:06:23 -040089 VECTOR_DISASSEMBLE(kFract, "fract")
Brian Osmanb08cc022020-04-02 11:38:40 -040090 case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
91 case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
92 case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -040093 case ByteCodeInstruction::kLoad: printf("load %d", READ8()); break;
94 case ByteCodeInstruction::kLoad2: printf("load2 %d", READ8()); break;
95 case ByteCodeInstruction::kLoad3: printf("load3 %d", READ8()); break;
96 case ByteCodeInstruction::kLoad4: printf("load4 %d", READ8()); break;
97 case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ8()); break;
98 case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ8()); break;
99 case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ8()); break;
100 case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ8()); break;
101 case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ8()); break;
102 case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ8()); break;
103 case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ8()); break;
104 case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400105 case ByteCodeInstruction::kLoadSwizzle: {
106 int target = READ8();
107 int count = READ8();
108 printf("loadswizzle %d %d", target, count);
109 for (int i = 0; i < count; ++i) {
110 printf(", %d", READ8());
111 }
112 break;
113 }
114 case ByteCodeInstruction::kLoadSwizzleGlobal: {
115 int target = READ8();
116 int count = READ8();
117 printf("loadswizzleglobal %d %d", target, count);
118 for (int i = 0; i < count; ++i) {
119 printf(", %d", READ8());
120 }
121 break;
122 }
123 case ByteCodeInstruction::kLoadSwizzleUniform: {
124 int target = READ8();
125 int count = READ8();
126 printf("loadswizzleuniform %d %d", target, count);
127 for (int i = 0; i < count; ++i) {
128 printf(", %d", READ8());
129 }
130 break;
131 }
132 case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break;
133 case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8());
134 break;
135 case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8());
136 break;
137 case ByteCodeInstruction::kMatrixToMatrix: {
138 int srcCols = READ8();
139 int srcRows = READ8();
140 int dstCols = READ8();
141 int dstRows = READ8();
142 printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
143 break;
144 }
145 case ByteCodeInstruction::kMatrixMultiply: {
146 int lCols = READ8();
147 int lRows = READ8();
148 int rCols = READ8();
149 printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
150 break;
151 }
152 VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
153 VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
Brian Osmanab8f3842020-04-07 09:30:44 -0400154 VECTOR_MATRIX_DISASSEMBLE(kNegateF, "negatef")
155 VECTOR_DISASSEMBLE(kNegateI, "negatei")
Brian Osmanb08cc022020-04-02 11:38:40 -0400156 case ByteCodeInstruction::kNotB: printf("notb"); break;
157 case ByteCodeInstruction::kOrB: printf("orb"); break;
Brian Osmanab8f3842020-04-07 09:30:44 -0400158 VECTOR_MATRIX_DISASSEMBLE(kPop, "pop")
Brian Osmanb08cc022020-04-02 11:38:40 -0400159 case ByteCodeInstruction::kPushImmediate: {
160 uint32_t v = READ32();
161 union { uint32_t u; float f; } pun = { v };
162 printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
163 break;
164 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400165 case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ8()); break;
166 case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ8()); break;
167 case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ8()); break;
168 case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400169 VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
170 VECTOR_DISASSEMBLE(kRemainderS, "remainders")
171 VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
172 case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break;
173 case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break;
174 case ByteCodeInstruction::kScalarToMatrix: {
175 int cols = READ8();
176 int rows = READ8();
177 printf("scalartomatrix %dx%d", cols, rows);
178 break;
179 }
180 case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
181 case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
182 case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
183 VECTOR_DISASSEMBLE(kSin, "sin")
Brian Osmanab8f3842020-04-07 09:30:44 -0400184 VECTOR_DISASSEMBLE(kSqrt, "sqrt")
Brian Osmanb08cc022020-04-02 11:38:40 -0400185 case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
186 case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
187 case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
188 case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break;
189 case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break;
190 case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break;
191 case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break;
192 case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break;
193 case ByteCodeInstruction::kStoreSwizzle: {
194 int target = READ8();
195 int count = READ8();
196 printf("storeswizzle %d %d", target, count);
197 for (int i = 0; i < count; ++i) {
198 printf(", %d", READ8());
199 }
200 break;
201 }
202 case ByteCodeInstruction::kStoreSwizzleGlobal: {
203 int target = READ8();
204 int count = READ8();
205 printf("storeswizzleglobal %d %d", target, count);
206 for (int i = 0; i < count; ++i) {
207 printf(", %d", READ8());
208 }
209 break;
210 }
211 case ByteCodeInstruction::kStoreSwizzleIndirect: {
212 int count = READ8();
213 printf("storeswizzleindirect %d", count);
214 for (int i = 0; i < count; ++i) {
215 printf(", %d", READ8());
216 }
217 break;
218 }
219 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
220 int count = READ8();
221 printf("storeswizzleindirectglobal %d", count);
222 for (int i = 0; i < count; ++i) {
223 printf(", %d", READ8());
224 }
225 break;
226 }
227 case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break;
228 case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8());
229 break;
230 VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf")
231 VECTOR_DISASSEMBLE(kSubtractI, "subtracti")
232 case ByteCodeInstruction::kSwizzle: {
233 printf("swizzle %d, ", READ8());
234 int count = READ8();
235 printf("%d", count);
236 for (int i = 0; i < count; ++i) {
237 printf(", %d", READ8());
238 }
239 break;
240 }
241 VECTOR_DISASSEMBLE(kTan, "tan")
Brian Osmanab8f3842020-04-07 09:30:44 -0400242 case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ8()); break;
243 case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ8()); break;
244 case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ8()); break;
245 case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ8()); break;
Brian Osmanb08cc022020-04-02 11:38:40 -0400246 case ByteCodeInstruction::kXorB: printf("xorb"); break;
247 case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
248 case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
249 case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
250 case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
251 case ByteCodeInstruction::kBranchIfAllFalse:
252 printf("branchifallfalse %d", READ16());
253 break;
254 case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
255 case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
256 case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
257 case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
258 case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
259 case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
260 default:
Brian Osmanab8f3842020-04-07 09:30:44 -0400261 ip -= sizeof(ByteCodeInstruction);
Brian Osmanb08cc022020-04-02 11:38:40 -0400262 printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
263 SkASSERT(false);
264 }
265 return ip;
266}
267
Brian Osmanab8f3842020-04-07 09:30:44 -0400268#define VECTOR_BINARY_OP(base, field, op) \
269 case ByteCodeInstruction::base ## 4: \
270 sp[-4] = sp[-4].field op sp[0].field; \
271 POP(); \
272 /* fall through */ \
273 case ByteCodeInstruction::base ## 3: { \
274 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
275 sp[count] = sp[count].field op sp[0].field; \
276 POP(); \
277 } /* fall through */ \
278 case ByteCodeInstruction::base ## 2: { \
279 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
280 sp[count] = sp[count].field op sp[0].field; \
281 POP(); \
282 } /* fall through */ \
283 case ByteCodeInstruction::base: { \
284 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
285 sp[count] = sp[count].field op sp[0].field; \
286 POP(); \
287 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400288 }
289
290// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
Brian Osmanab8f3842020-04-07 09:30:44 -0400291// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
292#define VECTOR_BINARY_MASKED_OP(base, field, op) \
293 case ByteCodeInstruction::base ## 4: \
294 for (int i = 0; i < VecWidth; ++i) { \
295 if (mask()[i]) { \
296 sp[-4].field[i] op ## = sp[0].field[i]; \
297 } \
298 } \
299 POP(); \
300 /* fall through */ \
301 case ByteCodeInstruction::base ## 3: { \
302 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
303 for (int i = 0; i < VecWidth; ++i) { \
304 if (mask()[i]) { \
305 sp[count].field[i] op ## = sp[0].field[i]; \
306 } \
307 } \
308 POP(); \
309 } /* fall through */ \
310 case ByteCodeInstruction::base ## 2: { \
311 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
312 for (int i = 0; i < VecWidth; ++i) { \
313 if (mask()[i]) { \
314 sp[count].field[i] op ## = sp[0].field[i]; \
315 } \
316 } \
317 POP(); \
318 } /* fall through */ \
319 case ByteCodeInstruction::base: { \
320 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
321 for (int i = 0; i < VecWidth; ++i) { \
322 if (mask()[i]) { \
323 sp[count].field[i] op ## = sp[0].field[i]; \
324 } \
325 } \
326 POP(); \
327 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400328 }
329
330
331#define VECTOR_MATRIX_BINARY_OP(base, field, op) \
332 VECTOR_BINARY_OP(base, field, op) \
Brian Osmanab8f3842020-04-07 09:30:44 -0400333 case ByteCodeInstruction::base ## N: { \
Brian Osmanb08cc022020-04-02 11:38:40 -0400334 int count = READ8(); \
335 for (int i = count; i > 0; --i) { \
336 sp[-count] = sp[-count].field op sp[0].field; \
337 POP(); \
338 } \
Brian Osmanab8f3842020-04-07 09:30:44 -0400339 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400340 }
341
Brian Osmanab8f3842020-04-07 09:30:44 -0400342#define VECTOR_BINARY_FN(base, field, fn) \
343 case ByteCodeInstruction::base ## 4: \
344 sp[-4] = fn(sp[-4].field, sp[0].field); \
345 POP(); \
346 /* fall through */ \
347 case ByteCodeInstruction::base ## 3: { \
348 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
349 sp[count] = fn(sp[count].field, sp[0].field); \
350 POP(); \
351 } /* fall through */ \
352 case ByteCodeInstruction::base ## 2: { \
353 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
354 sp[count] = fn(sp[count].field, sp[0].field); \
355 POP(); \
356 } /* fall through */ \
357 case ByteCodeInstruction::base: { \
358 int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
359 sp[count] = fn(sp[count].field, sp[0].field); \
360 POP(); \
361 continue; \
Brian Osmanb08cc022020-04-02 11:38:40 -0400362 }
363
Brian Osmanab8f3842020-04-07 09:30:44 -0400364#define VECTOR_UNARY_FN(base, fn, field) \
365 case ByteCodeInstruction::base ## 4: sp[-3] = fn(sp[-3].field); \
366 case ByteCodeInstruction::base ## 3: sp[-2] = fn(sp[-2].field); \
367 case ByteCodeInstruction::base ## 2: sp[-1] = fn(sp[-1].field); \
368 case ByteCodeInstruction::base: sp[ 0] = fn(sp[ 0].field); \
369 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400370
Brian Osmanb08cc022020-04-02 11:38:40 -0400371union VValue {
372 VValue() {}
373 VValue(F32 f) : fFloat(f) {}
374 VValue(I32 s) : fSigned(s) {}
375 VValue(U32 u) : fUnsigned(u) {}
376
377 F32 fFloat;
378 I32 fSigned;
379 U32 fUnsigned;
380};
381
382struct StackFrame {
383 const uint8_t* fCode;
384 const uint8_t* fIP;
385 VValue* fStack;
386 int fParameterCount;
387};
388
389static F32 VecMod(F32 a, F32 b) {
390 return a - skvx::trunc(a / b) * b;
391}
392
393#define spf(index) sp[index].fFloat
394
395static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp,
396 int baseIndex, I32 mask) {
397 int argumentCount = READ8();
398 int returnCount = READ8();
399 int target = READ8();
400 ExternalValue* v = byteCode->fExternalValues[target];
401 sp -= argumentCount - 1;
402
403 float tmpArgs[4];
404 float tmpReturn[4];
405 SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
406 SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
407
408 for (int i = 0; i < VecWidth; ++i) {
409 if (mask[i]) {
410 for (int j = 0; j < argumentCount; ++j) {
411 tmpArgs[j] = sp[j].fFloat[i];
412 }
413 v->call(baseIndex + i, tmpArgs, tmpReturn);
414 for (int j = 0; j < returnCount; ++j) {
415 sp[j].fFloat[i] = tmpReturn[j];
416 }
417 }
418 }
419 sp += returnCount - 1;
420}
421
422static void Inverse2x2(VValue* sp) {
423 F32 a = sp[-3].fFloat,
424 b = sp[-2].fFloat,
425 c = sp[-1].fFloat,
426 d = sp[ 0].fFloat;
427 F32 idet = F32(1) / (a*d - b*c);
428 sp[-3].fFloat = d * idet;
429 sp[-2].fFloat = -b * idet;
430 sp[-1].fFloat = -c * idet;
431 sp[ 0].fFloat = a * idet;
432}
433
434static void Inverse3x3(VValue* sp) {
435 F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat,
436 a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat,
437 a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat;
438 F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
439 a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
440 sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet;
441 sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet;
442 sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet;
443 sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet;
444 sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet;
445 sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet;
446 sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet;
447 sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet;
448 sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet;
449}
450
451static void Inverse4x4(VValue* sp) {
452 F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3),
453 a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2),
454 a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1),
455 a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0);
456
457 F32 b00 = a00 * a11 - a01 * a10,
458 b01 = a00 * a12 - a02 * a10,
459 b02 = a00 * a13 - a03 * a10,
460 b03 = a01 * a12 - a02 * a11,
461 b04 = a01 * a13 - a03 * a11,
462 b05 = a02 * a13 - a03 * a12,
463 b06 = a20 * a31 - a21 * a30,
464 b07 = a20 * a32 - a22 * a30,
465 b08 = a20 * a33 - a23 * a30,
466 b09 = a21 * a32 - a22 * a31,
467 b10 = a21 * a33 - a23 * a31,
468 b11 = a22 * a33 - a23 * a32;
469
470 F32 idet = F32(1) /
471 (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
472
473 b00 *= idet;
474 b01 *= idet;
475 b02 *= idet;
476 b03 *= idet;
477 b04 *= idet;
478 b05 *= idet;
479 b06 *= idet;
480 b07 *= idet;
481 b08 *= idet;
482 b09 *= idet;
483 b10 *= idet;
484 b11 *= idet;
485
486 spf(-15) = a11 * b11 - a12 * b10 + a13 * b09;
487 spf(-14) = a02 * b10 - a01 * b11 - a03 * b09;
488 spf(-13) = a31 * b05 - a32 * b04 + a33 * b03;
489 spf(-12) = a22 * b04 - a21 * b05 - a23 * b03;
490 spf(-11) = a12 * b08 - a10 * b11 - a13 * b07;
491 spf(-10) = a00 * b11 - a02 * b08 + a03 * b07;
492 spf( -9) = a32 * b02 - a30 * b05 - a33 * b01;
493 spf( -8) = a20 * b05 - a22 * b02 + a23 * b01;
494 spf( -7) = a10 * b10 - a11 * b08 + a13 * b06;
495 spf( -6) = a01 * b08 - a00 * b10 - a03 * b06;
496 spf( -5) = a30 * b04 - a31 * b02 + a33 * b00;
497 spf( -4) = a21 * b02 - a20 * b04 - a23 * b00;
498 spf( -3) = a11 * b07 - a10 * b09 - a12 * b06;
499 spf( -2) = a00 * b09 - a01 * b07 + a02 * b06;
500 spf( -1) = a31 * b01 - a30 * b03 - a32 * b00;
501 spf( 0) = a20 * b03 - a21 * b01 + a22 * b00;
502}
503
504static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
505 float* outReturn[], VValue globals[], const float uniforms[],
506 bool stripedOutput, int N, int baseIndex) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400507 // Needs to be the first N non-negative integers, at least as large as VecWidth
508 static const Interpreter::I32 gLanes = {
509 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
510 };
511
512 VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1;
513
514 #define POP() (*(sp--))
515 #define PUSH(v) (sp[1] = v, ++sp)
516
517 const uint8_t* code = f->fCode.data();
518 const uint8_t* ip = code;
519 std::vector<StackFrame> frames;
520
521 I32 condStack[16]; // Independent condition masks
522 I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
523 I32 contStack[16]; // Continue flags for loops
524 I32 loopStack[16]; // Loop execution masks
525 condStack[0] = maskStack[0] = (gLanes < N);
526 contStack[0] = I32( 0);
527 loopStack[0] = I32(~0);
528 I32* condPtr = condStack;
529 I32* maskPtr = maskStack;
530 I32* contPtr = contStack;
531 I32* loopPtr = loopStack;
532
533 if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) ||
534 f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) {
535 return false;
536 }
537
538 auto mask = [&]() { return *maskPtr & *loopPtr; };
539
Brian Osmanb08cc022020-04-02 11:38:40 -0400540 for (;;) {
Brian Osmanab8f3842020-04-07 09:30:44 -0400541#ifdef TRACE
542 printf("at %3d ", (int) (ip - code));
543 disassemble_instruction(ip);
544 printf(" (stack: %d)\n", (int) (sp - stack) + 1);
Brian Osmanb08cc022020-04-02 11:38:40 -0400545#endif
Brian Osmanab8f3842020-04-07 09:30:44 -0400546 ByteCodeInstruction inst = READ_INST();
547 switch (inst) {
Brian Osmanb08cc022020-04-02 11:38:40 -0400548
Brian Osmanab8f3842020-04-07 09:30:44 -0400549 VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
550 VECTOR_BINARY_OP(kAddI, fSigned, +)
Brian Osmanb08cc022020-04-02 11:38:40 -0400551
Brian Osmanab8f3842020-04-07 09:30:44 -0400552 // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
553 case ByteCodeInstruction::kAndB:
554 sp[-1] = sp[-1].fSigned & sp[0].fSigned;
555 POP();
556 continue;
557 case ByteCodeInstruction::kNotB:
558 sp[0] = ~sp[0].fSigned;
559 continue;
560 case ByteCodeInstruction::kOrB:
561 sp[-1] = sp[-1].fSigned | sp[0].fSigned;
562 POP();
563 continue;
564 case ByteCodeInstruction::kXorB:
565 sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
566 POP();
567 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400568
Brian Osmanab8f3842020-04-07 09:30:44 -0400569 case ByteCodeInstruction::kBranch:
570 ip = code + READ16();
571 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400572
Brian Osmanab8f3842020-04-07 09:30:44 -0400573 case ByteCodeInstruction::kCall: {
574 // Precursor code reserved space for the return value, and pushed all parameters to
575 // the stack. Update our bottom of stack to point at the first parameter, and our
576 // sp to point past those parameters (plus space for locals).
577 int target = READ8();
Mike Klein01d42b12020-04-14 15:34:53 -0500578 const ByteCodeFunction* f = byteCode->fFunctions[target].get();
Brian Osmanab8f3842020-04-07 09:30:44 -0400579 if (skvx::any(mask())) {
Mike Klein01d42b12020-04-14 15:34:53 -0500580 frames.push_back({ code, ip, stack, f->fParameterCount });
581 ip = code = f->fCode.data();
582 stack = sp - f->fParameterCount + 1;
583 sp = stack + f->fParameterCount + f->fLocalCount - 1;
584 // As we did in runStriped(), zero locals so they're safe to mask-store into.
585 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
586 stack[i].fFloat = 0.0f;
587 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400588 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400589 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400590 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400591
Brian Osmanab8f3842020-04-07 09:30:44 -0400592 case ByteCodeInstruction::kCallExternal: {
593 CallExternal(byteCode, ip, sp, baseIndex, mask());
594 continue;
595 }
596
597 case ByteCodeInstruction::kClampIndex: {
598 int length = READ8();
599 if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
600 return false;
Brian Osmanb08cc022020-04-02 11:38:40 -0400601 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400602 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400603 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400604
Brian Osmanab8f3842020-04-07 09:30:44 -0400605 VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
606 VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
607 VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
608 VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
609 VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
610 VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
611 VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
612 VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
613 VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
614 VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
615 VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
616 VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
617 VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
618 VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
619 VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
620 VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
621
622 case ByteCodeInstruction::kConvertFtoI4: sp[-3] = skvx::cast<int>(sp[-3].fFloat);
623 case ByteCodeInstruction::kConvertFtoI3: sp[-2] = skvx::cast<int>(sp[-2].fFloat);
624 case ByteCodeInstruction::kConvertFtoI2: sp[-1] = skvx::cast<int>(sp[-1].fFloat);
625 case ByteCodeInstruction::kConvertFtoI: sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
626 continue;
627
628 case ByteCodeInstruction::kConvertStoF4: sp[-3] = skvx::cast<float>(sp[-3].fSigned);
629 case ByteCodeInstruction::kConvertStoF3: sp[-2] = skvx::cast<float>(sp[-2].fSigned);
630 case ByteCodeInstruction::kConvertStoF2: sp[-1] = skvx::cast<float>(sp[-1].fSigned);
631 case ByteCodeInstruction::kConvertStoF: sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
632 continue;
633
634 case ByteCodeInstruction::kConvertUtoF4: sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
635 case ByteCodeInstruction::kConvertUtoF3: sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
636 case ByteCodeInstruction::kConvertUtoF2: sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
637 case ByteCodeInstruction::kConvertUtoF: sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
638 continue;
639
Mike Kleinc2160252020-04-29 09:56:56 -0500640 VECTOR_UNARY_FN(kCos, skvx::cos, fFloat)
Brian Osmanab8f3842020-04-07 09:30:44 -0400641
642 VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
643 VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
644 VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
645
646 case ByteCodeInstruction::kDup4: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
647 case ByteCodeInstruction::kDup3: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
648 case ByteCodeInstruction::kDup2: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
649 case ByteCodeInstruction::kDup : PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
650 continue;
651
652 case ByteCodeInstruction::kDupN: {
653 int count = READ8();
654 memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
655 sp += count;
656 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400657 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400658
Mike Reed8520e762020-04-30 12:06:23 -0400659 VECTOR_UNARY_FN(kFract, skvx::fract, fFloat)
660
Brian Osmanab8f3842020-04-07 09:30:44 -0400661 case ByteCodeInstruction::kInverse2x2:
662 Inverse2x2(sp);
663 continue;
664 case ByteCodeInstruction::kInverse3x3:
665 Inverse3x3(sp);
666 continue;
667 case ByteCodeInstruction::kInverse4x4:
668 Inverse4x4(sp);
669 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400670
Brian Osmanab8f3842020-04-07 09:30:44 -0400671 case ByteCodeInstruction::kLoad4: sp[4] = stack[*ip + 3];
672 case ByteCodeInstruction::kLoad3: sp[3] = stack[*ip + 2];
673 case ByteCodeInstruction::kLoad2: sp[2] = stack[*ip + 1];
674 case ByteCodeInstruction::kLoad: sp[1] = stack[*ip + 0];
675 ++ip;
676 sp += (int)ByteCodeInstruction::kLoad - (int)inst + 1;
677 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400678
Brian Osmanab8f3842020-04-07 09:30:44 -0400679 case ByteCodeInstruction::kLoadGlobal4: sp[4] = globals[*ip + 3];
680 case ByteCodeInstruction::kLoadGlobal3: sp[3] = globals[*ip + 2];
681 case ByteCodeInstruction::kLoadGlobal2: sp[2] = globals[*ip + 1];
682 case ByteCodeInstruction::kLoadGlobal: sp[1] = globals[*ip + 0];
683 ++ip;
684 sp += (int)ByteCodeInstruction::kLoadGlobal - (int)inst + 1;
685 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400686
Brian Osmanab8f3842020-04-07 09:30:44 -0400687 case ByteCodeInstruction::kLoadUniform4: sp[4].fFloat = uniforms[*ip + 3];
688 case ByteCodeInstruction::kLoadUniform3: sp[3].fFloat = uniforms[*ip + 2];
689 case ByteCodeInstruction::kLoadUniform2: sp[2].fFloat = uniforms[*ip + 1];
690 case ByteCodeInstruction::kLoadUniform: sp[1].fFloat = uniforms[*ip + 0];
691 ++ip;
692 sp += (int)ByteCodeInstruction::kLoadUniform - (int)inst + 1;
693 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400694
Brian Osmanab8f3842020-04-07 09:30:44 -0400695 case ByteCodeInstruction::kLoadExtended: {
696 int count = READ8();
697 I32 src = POP().fSigned;
698 I32 m = mask();
699 for (int i = 0; i < count; ++i) {
700 for (int j = 0; j < VecWidth; ++j) {
701 if (m[j]) {
702 sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
Brian Osmanb08cc022020-04-02 11:38:40 -0400703 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400704 }
705 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400706 sp += count;
707 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400708 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400709
Brian Osmanab8f3842020-04-07 09:30:44 -0400710 case ByteCodeInstruction::kLoadExtendedGlobal: {
711 int count = READ8();
712 I32 src = POP().fSigned;
713 I32 m = mask();
714 for (int i = 0; i < count; ++i) {
715 for (int j = 0; j < VecWidth; ++j) {
716 if (m[j]) {
717 sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
718 }
719 }
720 }
721 sp += count;
722 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400723 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400724
Brian Osmanab8f3842020-04-07 09:30:44 -0400725 case ByteCodeInstruction::kLoadExtendedUniform: {
726 int count = READ8();
727 I32 src = POP().fSigned;
728 I32 m = mask();
729 for (int i = 0; i < count; ++i) {
730 for (int j = 0; j < VecWidth; ++j) {
731 if (m[j]) {
732 sp[i + 1].fFloat[j] = uniforms[src[j] + i];
733 }
734 }
735 }
736 sp += count;
737 continue;
738 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400739
Brian Osmanab8f3842020-04-07 09:30:44 -0400740 case ByteCodeInstruction::kLoadSwizzle: {
741 int src = READ8();
742 int count = READ8();
743 for (int i = 0; i < count; ++i) {
744 PUSH(stack[src + *(ip + i)]);
745 }
746 ip += count;
747 continue;
748 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400749
Brian Osmanab8f3842020-04-07 09:30:44 -0400750 case ByteCodeInstruction::kLoadSwizzleGlobal: {
751 int src = READ8();
752 int count = READ8();
753 for (int i = 0; i < count; ++i) {
754 PUSH(globals[src + *(ip + i)]);
755 }
756 ip += count;
757 continue;
758 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400759
Brian Osmanab8f3842020-04-07 09:30:44 -0400760 case ByteCodeInstruction::kLoadSwizzleUniform: {
761 int src = READ8();
762 int count = READ8();
763 for (int i = 0; i < count; ++i) {
764 PUSH(F32(uniforms[src + *(ip + i)]));
765 }
766 ip += count;
767 continue;
768 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400769
Brian Osmanab8f3842020-04-07 09:30:44 -0400770 case ByteCodeInstruction::kMatrixToMatrix: {
771 int srcCols = READ8();
772 int srcRows = READ8();
773 int dstCols = READ8();
774 int dstRows = READ8();
775 SkASSERT(srcCols >= 2 && srcCols <= 4);
776 SkASSERT(srcRows >= 2 && srcRows <= 4);
777 SkASSERT(dstCols >= 2 && dstCols <= 4);
778 SkASSERT(dstRows >= 2 && dstRows <= 4);
779 F32 tmp[16];
780 memset(tmp, 0, sizeof(tmp));
781 tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
782 for (int c = srcCols - 1; c >= 0; --c) {
783 for (int r = srcRows - 1; r >= 0; --r) {
784 tmp[c*4 + r] = POP().fFloat;
785 }
786 }
787 for (int c = 0; c < dstCols; ++c) {
788 for (int r = 0; r < dstRows; ++r) {
789 PUSH(tmp[c*4 + r]);
790 }
791 }
792 continue;
793 }
794
795 case ByteCodeInstruction::kMatrixMultiply: {
796 int lCols = READ8();
797 int lRows = READ8();
798 int rCols = READ8();
799 int rRows = lCols;
800 F32 tmp[16] = { 0.0f };
801 F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
802 F32* A = B - (lCols * lRows);
803 for (int c = 0; c < rCols; ++c) {
804 for (int r = 0; r < lRows; ++r) {
805 for (int j = 0; j < lCols; ++j) {
806 tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
807 }
808 }
809 }
810 sp -= (lCols * lRows) + (rCols * rRows);
811 memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
812 sp += (rCols * lRows);
813 continue;
814 }
815
816 VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
817 VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
818
819 case ByteCodeInstruction::kNegateF4: sp[-3] = -sp[-3].fFloat;
820 case ByteCodeInstruction::kNegateF3: sp[-2] = -sp[-2].fFloat;
821 case ByteCodeInstruction::kNegateF2: sp[-1] = -sp[-1].fFloat;
822 case ByteCodeInstruction::kNegateF: sp[ 0] = -sp[ 0].fFloat;
823 continue;
824
825 case ByteCodeInstruction::kNegateFN: {
826 int count = READ8();
827 for (int i = count - 1; i >= 0; --i) {
828 sp[-i] = -sp[-i].fFloat;
829 }
830 continue;
831 }
832
833 case ByteCodeInstruction::kNegateI4: sp[-3] = -sp[-3].fSigned;
834 case ByteCodeInstruction::kNegateI3: sp[-2] = -sp[-2].fSigned;
835 case ByteCodeInstruction::kNegateI2: sp[-1] = -sp[-1].fSigned;
836 case ByteCodeInstruction::kNegateI: sp[ 0] = -sp[ 0].fSigned;
837 continue;
838
839 case ByteCodeInstruction::kPop4: POP();
840 case ByteCodeInstruction::kPop3: POP();
841 case ByteCodeInstruction::kPop2: POP();
842 case ByteCodeInstruction::kPop: POP();
843 continue;
844
845 case ByteCodeInstruction::kPopN:
846 sp -= READ8();
847 continue;
848
849 case ByteCodeInstruction::kPushImmediate:
850 PUSH(U32(READ32()));
851 continue;
852
853 case ByteCodeInstruction::kReadExternal:
854 case ByteCodeInstruction::kReadExternal2:
855 case ByteCodeInstruction::kReadExternal3:
856 case ByteCodeInstruction::kReadExternal4: {
857 int count = (int)ByteCodeInstruction::kReadExternal - (int)inst + 1;
858 int src = READ8();
859 float tmp[4];
860 I32 m = mask();
861 for (int i = 0; i < VecWidth; ++i) {
862 if (m[i]) {
863 byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
864 for (int j = 0; j < count; ++j) {
865 sp[j + 1].fFloat[i] = tmp[j];
866 }
867 }
868 }
869 sp += count;
870 continue;
871 }
872
873 VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
874 VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
875 VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
876
877 case ByteCodeInstruction::kReserve:
878 sp += READ8();
879 continue;
880
881 case ByteCodeInstruction::kReturn: {
882 int count = READ8();
883 if (frames.empty()) {
884 if (outReturn) {
885 VValue* src = sp - count + 1;
886 if (stripedOutput) {
887 for (int i = 0; i < count; ++i) {
888 memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
889 ++src;
890 }
891 } else {
892 float* outPtr = outReturn[0];
893 for (int i = 0; i < count; ++i) {
894 for (int j = 0; j < N; ++j) {
895 outPtr[count * j] = src->fFloat[j];
896 }
897 ++outPtr;
898 ++src;
899 }
900 }
901 }
902 return true;
903 } else {
904 // When we were called, the caller reserved stack space for their copy of our
905 // return value, then 'stack' was positioned after that, where our parameters
906 // were placed. Copy our return values to their reserved area.
907 memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
908
909 // Now move the stack pointer to the end of the passed-in parameters. This odd
910 // calling convention requires the caller to pop the arguments after calling,
911 // but allows them to store any out-parameters back during that unwinding.
912 // After that sequence finishes, the return value will be the top of the stack.
913 const StackFrame& frame(frames.back());
914 sp = stack + frame.fParameterCount - 1;
915 stack = frame.fStack;
916 code = frame.fCode;
917 ip = frame.fIP;
918 frames.pop_back();
919 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -0400920 }
921 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400922
923 case ByteCodeInstruction::kScalarToMatrix: {
924 int cols = READ8();
925 int rows = READ8();
926 VValue v = POP();
927 for (int c = 0; c < cols; ++c) {
928 for (int r = 0; r < rows; ++r) {
929 PUSH(c == r ? v : F32(0.0f));
930 }
Brian Osmanb08cc022020-04-02 11:38:40 -0400931 }
Brian Osmanab8f3842020-04-07 09:30:44 -0400932 continue;
933 }
934
935 case ByteCodeInstruction::kShiftLeft:
936 sp[0] = sp[0].fSigned << READ8();
937 continue;
938 case ByteCodeInstruction::kShiftRightS:
939 sp[0] = sp[0].fSigned >> READ8();
940 continue;
941 case ByteCodeInstruction::kShiftRightU:
942 sp[0] = sp[0].fUnsigned >> READ8();
943 continue;
944
Mike Kleinc2160252020-04-29 09:56:56 -0500945 VECTOR_UNARY_FN(kSin, skvx::sin, fFloat)
Brian Osmanab8f3842020-04-07 09:30:44 -0400946 VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
947
948 case ByteCodeInstruction::kStore4:
949 stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
950 case ByteCodeInstruction::kStore3:
951 stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
952 case ByteCodeInstruction::kStore2:
953 stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
954 case ByteCodeInstruction::kStore:
955 stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
956 ++ip;
957 continue;
958
959 case ByteCodeInstruction::kStoreGlobal4:
960 globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
961 case ByteCodeInstruction::kStoreGlobal3:
962 globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
963 case ByteCodeInstruction::kStoreGlobal2:
964 globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
965 case ByteCodeInstruction::kStoreGlobal:
966 globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
967 ++ip;
968 continue;
969
970 case ByteCodeInstruction::kStoreExtended: {
971 int count = READ8();
972 I32 target = POP().fSigned;
973 VValue* src = sp - count + 1;
974 I32 m = mask();
975 for (int i = 0; i < count; ++i) {
976 for (int j = 0; j < VecWidth; ++j) {
977 if (m[j]) {
978 stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
979 }
980 }
981 }
982 sp -= count;
983 continue;
984 }
985 case ByteCodeInstruction::kStoreExtendedGlobal: {
986 int count = READ8();
987 I32 target = POP().fSigned;
988 VValue* src = sp - count + 1;
989 I32 m = mask();
990 for (int i = 0; i < count; ++i) {
991 for (int j = 0; j < VecWidth; ++j) {
992 if (m[j]) {
993 globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
994 }
995 }
996 }
997 sp -= count;
998 continue;
999 }
1000
1001 case ByteCodeInstruction::kStoreSwizzle: {
1002 int target = READ8();
1003 int count = READ8();
1004 for (int i = count - 1; i >= 0; --i) {
1005 stack[target + *(ip + i)] = skvx::if_then_else(
1006 mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
1007 }
1008 ip += count;
1009 continue;
1010 }
1011
1012 case ByteCodeInstruction::kStoreSwizzleGlobal: {
1013 int target = READ8();
1014 int count = READ8();
1015 for (int i = count - 1; i >= 0; --i) {
1016 globals[target + *(ip + i)] = skvx::if_then_else(
1017 mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
1018 }
1019 ip += count;
1020 continue;
1021 }
1022
1023 case ByteCodeInstruction::kStoreSwizzleIndirect: {
1024 int count = READ8();
1025 I32 target = POP().fSigned;
1026 I32 m = mask();
1027 for (int i = count - 1; i >= 0; --i) {
1028 I32 v = POP().fSigned;
1029 for (int j = 0; j < VecWidth; ++j) {
1030 if (m[j]) {
1031 stack[target[j] + *(ip + i)].fSigned[j] = v[j];
1032 }
1033 }
1034 }
1035 ip += count;
1036 continue;
1037 }
1038
1039 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
1040 int count = READ8();
1041 I32 target = POP().fSigned;
1042 I32 m = mask();
1043 for (int i = count - 1; i >= 0; --i) {
1044 I32 v = POP().fSigned;
1045 for (int j = 0; j < VecWidth; ++j) {
1046 if (m[j]) {
1047 globals[target[j] + *(ip + i)].fSigned[j] = v[j];
1048 }
1049 }
1050 }
1051 ip += count;
1052 continue;
1053 }
1054
1055 VECTOR_BINARY_OP(kSubtractI, fSigned, -)
1056 VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
1057
1058 case ByteCodeInstruction::kSwizzle: {
1059 VValue tmp[4];
1060 for (int i = READ8() - 1; i >= 0; --i) {
1061 tmp[i] = POP();
1062 }
1063 for (int i = READ8() - 1; i >= 0; --i) {
1064 PUSH(tmp[READ8()]);
1065 }
1066 continue;
1067 }
1068
Mike Reed8520e762020-04-30 12:06:23 -04001069 VECTOR_UNARY_FN(kATan, skvx::atan, fFloat)
Mike Kleinc2160252020-04-29 09:56:56 -05001070 VECTOR_UNARY_FN(kTan, skvx::tan, fFloat)
Brian Osmanab8f3842020-04-07 09:30:44 -04001071
1072 case ByteCodeInstruction::kWriteExternal4:
1073 case ByteCodeInstruction::kWriteExternal3:
1074 case ByteCodeInstruction::kWriteExternal2:
1075 case ByteCodeInstruction::kWriteExternal: {
1076 int count = (int)ByteCodeInstruction::kWriteExternal - (int)inst + 1;
1077 int target = READ8();
1078 float tmp[4];
1079 I32 m = mask();
1080 sp -= count;
1081 for (int i = 0; i < VecWidth; ++i) {
1082 if (m[i]) {
1083 for (int j = 0; j < count; ++j) {
1084 tmp[j] = sp[j + 1].fFloat[i];
1085 }
1086 byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
1087 }
1088 }
1089 continue;
1090 }
1091
1092 case ByteCodeInstruction::kMaskPush:
1093 condPtr[1] = POP().fSigned;
1094 maskPtr[1] = maskPtr[0] & condPtr[1];
1095 ++condPtr; ++maskPtr;
1096 continue;
1097 case ByteCodeInstruction::kMaskPop:
1098 --condPtr; --maskPtr;
1099 continue;
1100 case ByteCodeInstruction::kMaskNegate:
1101 maskPtr[0] = maskPtr[-1] & ~condPtr[0];
1102 continue;
1103 case ByteCodeInstruction::kMaskBlend: {
1104 int count = READ8();
1105 I32 m = condPtr[0];
1106 --condPtr; --maskPtr;
1107 for (int i = 0; i < count; ++i) {
1108 sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
1109 --sp;
1110 }
1111 continue;
1112 }
1113 case ByteCodeInstruction::kBranchIfAllFalse: {
1114 int target = READ16();
1115 if (!skvx::any(mask())) {
1116 ip = code + target;
1117 }
1118 continue;
1119 }
1120
1121 case ByteCodeInstruction::kLoopBegin:
1122 contPtr[1] = 0;
1123 loopPtr[1] = loopPtr[0];
1124 ++contPtr; ++loopPtr;
1125 continue;
1126 case ByteCodeInstruction::kLoopNext:
1127 *loopPtr |= *contPtr;
1128 *contPtr = 0;
1129 continue;
1130 case ByteCodeInstruction::kLoopMask:
1131 *loopPtr &= POP().fSigned;
1132 continue;
1133 case ByteCodeInstruction::kLoopEnd:
1134 --contPtr; --loopPtr;
1135 continue;
1136 case ByteCodeInstruction::kLoopBreak:
1137 *loopPtr &= ~mask();
1138 continue;
1139 case ByteCodeInstruction::kLoopContinue: {
1140 I32 m = mask();
1141 *contPtr |= m;
1142 *loopPtr &= ~m;
1143 continue;
Brian Osmanb08cc022020-04-02 11:38:40 -04001144 }
1145 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001146 }
Brian Osmanb08cc022020-04-02 11:38:40 -04001147}
1148
1149}; // class Interpreter
1150
1151#endif // SK_ENABLE_SKSL_INTERPRETER
1152
1153#undef spf
1154
1155void ByteCodeFunction::disassemble() const {
1156#if defined(SK_ENABLE_SKSL_INTERPRETER)
1157 const uint8_t* ip = fCode.data();
1158 while (ip < fCode.data() + fCode.size()) {
1159 printf("%d: ", (int)(ip - fCode.data()));
1160 ip = Interpreter::DisassembleInstruction(ip);
1161 printf("\n");
1162 }
1163#endif
1164}
1165
Brian Osmanb08cc022020-04-02 11:38:40 -04001166bool ByteCode::run(const ByteCodeFunction* f,
1167 float* args, int argCount,
1168 float* outReturn, int returnCount,
1169 const float* uniforms, int uniformCount) const {
1170#if defined(SK_ENABLE_SKSL_INTERPRETER)
1171 Interpreter::VValue stack[128];
1172 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1173 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1174 return false;
1175 }
1176
1177 if (argCount != f->fParameterCount ||
1178 returnCount != f->fReturnCount ||
1179 uniformCount != fUniformSlotCount) {
1180 return false;
1181 }
1182
1183 Interpreter::VValue globals[32];
1184 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1185 return false;
1186 }
1187
1188 // Transpose args into stack
1189 {
1190 float* src = args;
1191 float* dst = (float*)stack;
1192 for (int i = 0; i < argCount; ++i) {
1193 *dst = *src++;
1194 dst += VecWidth;
1195 }
1196 }
1197
1198 bool stripedOutput = false;
1199 float** outArray = outReturn ? &outReturn : nullptr;
1200 if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) {
1201 return false;
1202 }
1203
1204 // Transpose out parameters back
1205 {
1206 float* dst = args;
1207 float* src = (float*)stack;
1208 for (const auto& p : f->fParameters) {
1209 if (p.fIsOutParameter) {
1210 for (int i = p.fSlotCount; i > 0; --i) {
1211 *dst++ = *src;
1212 src += VecWidth;
1213 }
1214 } else {
1215 dst += p.fSlotCount;
1216 src += p.fSlotCount * VecWidth;
1217 }
1218 }
1219 }
1220
1221 return true;
1222#else
1223 SkDEBUGFAIL("ByteCode interpreter not enabled");
1224 return false;
1225#endif
1226}
1227
1228bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
1229 float* args[], int argCount,
1230 float* outReturn[], int returnCount,
1231 const float* uniforms, int uniformCount) const {
1232#if defined(SK_ENABLE_SKSL_INTERPRETER)
1233 Interpreter::VValue stack[128];
1234 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1235 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1236 return false;
1237 }
1238
1239 if (argCount != f->fParameterCount ||
1240 returnCount != f->fReturnCount ||
1241 uniformCount != fUniformSlotCount) {
1242 return false;
1243 }
1244
1245 Interpreter::VValue globals[32];
1246 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1247 return false;
1248 }
1249
1250 // innerRun just takes outArgs, so clear it if the count is zero
1251 if (returnCount == 0) {
1252 outReturn = nullptr;
1253 }
1254
Mike Klein01d42b12020-04-14 15:34:53 -05001255 // The instructions to store to locals and globals mask in the original value,
1256 // so they technically need to be initialized (to any value).
1257 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
1258 stack[i].fFloat = 0.0f;
1259 }
1260 for (int i = 0; i < fGlobalSlotCount; i++) {
1261 globals[i].fFloat = 0.0f;
1262 }
1263
Brian Osmanb08cc022020-04-02 11:38:40 -04001264 int baseIndex = 0;
1265
1266 while (N) {
1267 int w = std::min(N, VecWidth);
1268
1269 // Copy args into stack
1270 for (int i = 0; i < argCount; ++i) {
1271 memcpy((void*)(stack + i), args[i], w * sizeof(float));
1272 }
1273
1274 bool stripedOutput = true;
1275 if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1276 baseIndex)) {
1277 return false;
1278 }
1279
1280 // Copy out parameters back
1281 int slot = 0;
1282 for (const auto& p : f->fParameters) {
1283 if (p.fIsOutParameter) {
1284 for (int i = slot; i < slot + p.fSlotCount; ++i) {
1285 memcpy(args[i], stack + i, w * sizeof(float));
1286 }
1287 }
1288 slot += p.fSlotCount;
1289 }
1290
1291 // Step each argument pointer ahead
1292 for (int i = 0; i < argCount; ++i) {
1293 args[i] += w;
1294 }
1295 N -= w;
1296 baseIndex += w;
1297 }
1298
1299 return true;
1300#else
1301 SkDEBUGFAIL("ByteCode interpreter not enabled");
1302 return false;
1303#endif
1304}
1305
1306} // namespace SkSL
1307
1308#endif