SkSL ByteCode: Remove specialized instructions for N up to 4
Nearly all instructions have one form, with a count byte after the
instruction. Simplifes the SkVM conversion logic, reduces code size.
Change-Id: I5ff7bb2991a09198c5c8f5bcaf2c1017c06be5d4
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/299682
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/core/SkRuntimeEffect.cpp b/src/core/SkRuntimeEffect.cpp
index 87d1501..008ae7a 100644
--- a/src/core/SkRuntimeEffect.cpp
+++ b/src/core/SkRuntimeEffect.cpp
@@ -461,33 +461,36 @@
//auto u16 = [&]{ auto x = sk_unaligned_load<uint16_t>(ip); ip += sizeof(x); return x; };
auto u32 = [&]{ auto x = sk_unaligned_load<uint32_t>(ip); ip += sizeof(x); return x; };
- auto unary = [&](Inst base, auto&& fn, bool allow_big = false) {
- int N = (int)base - (int)inst + 1;
- SkASSERT(0 < N && N <= (allow_big ? 5 : 4));
- if (N == 5) { N = u8(); }
- std::vector<skvm::F32> args(N);
- for (int i = 0; i < N; ++i) {
- args[i] = pop();
- }
- for (int i = N; i --> 0;) {
- push(fn(args[i]));
+ auto unary = [&](auto&& fn) {
+ int N = u8();
+ std::vector<skvm::F32> a(N);
+ for (int i = N; i --> 0; ) { a[i] = pop(); }
+
+ for (int i = 0; i < N; i++) {
+ push(fn(a[i]));
}
};
- auto binary = [&](Inst base, auto&& fn, bool allow_big = false) {
- int N = (int)base - (int)inst + 1;
- SkASSERT(0 < N && N <= (allow_big ? 5 : 4));
- if (N == 5) { N = u8(); }
- std::vector<skvm::F32> right(N);
- for (int i = 0; i < N; ++i) {
- right[i] = pop();
+ auto binary = [&](auto&& fn) {
+ int N = u8();
+ std::vector<skvm::F32> a(N), b(N);
+ for (int i = N; i --> 0; ) { b[i] = pop(); }
+ for (int i = N; i --> 0; ) { a[i] = pop(); }
+
+ for (int i = 0; i < N; i++) {
+ push(fn(a[i], b[i]));
}
- std::vector<skvm::F32> left(N);
- for (int i = 0; i < N; ++i) {
- left[i] = pop();
- }
- for (int i = N; i --> 0;) {
- push(fn(left[i], right[i]));
+ };
+
+ auto ternary = [&](auto&& fn) {
+ int N = u8();
+ std::vector<skvm::F32> a(N), b(N), c(N);
+ for (int i = N; i --> 0; ) { c[i] = pop(); }
+ for (int i = N; i --> 0; ) { b[i] = pop(); }
+ for (int i = N; i --> 0; ) { a[i] = pop(); }
+
+ for (int i = 0; i < N; i++) {
+ push(fn(a[i], b[i], c[i]));
}
};
@@ -552,55 +555,19 @@
} break;
case Inst::kLoad: {
- int ix = u8();
- push(stack[ix + 0]);
- } break;
-
- case Inst::kLoad2: {
- int ix = u8();
- push(stack[ix + 0]);
- push(stack[ix + 1]);
- } break;
-
- case Inst::kLoad3: {
- int ix = u8();
- push(stack[ix + 0]);
- push(stack[ix + 1]);
- push(stack[ix + 2]);
- } break;
-
- case Inst::kLoad4: {
- int ix = u8();
- push(stack[ix + 0]);
- push(stack[ix + 1]);
- push(stack[ix + 2]);
- push(stack[ix + 3]);
+ int N = u8(),
+ ix = u8();
+ for (int i = 0; i < N; ++i) {
+ push(stack[ix + i]);
+ }
} break;
case Inst::kLoadUniform: {
- int ix = u8();
- push(uniform[ix]);
- } break;
-
- case Inst::kLoadUniform2: {
- int ix = u8();
- push(uniform[ix + 0]);
- push(uniform[ix + 1]);
- } break;
-
- case Inst::kLoadUniform3: {
- int ix = u8();
- push(uniform[ix + 0]);
- push(uniform[ix + 1]);
- push(uniform[ix + 2]);
- } break;
-
- case Inst::kLoadUniform4: {
- int ix = u8();
- push(uniform[ix + 0]);
- push(uniform[ix + 1]);
- push(uniform[ix + 2]);
- push(uniform[ix + 3]);
+ int N = u8(),
+ ix = u8();
+ for (int i = 0; i < N; ++i) {
+ push(uniform[ix + i]);
+ }
} break;
case Inst::kLoadFragCoord: {
@@ -612,56 +579,22 @@
} break;
case Inst::kStore: {
- int ix = u8();
- stack[ix + 0] = pop();
+ int N = u8(),
+ ix = u8();
+ for (int i = N; i --> 0; ) {
+ stack[ix + i] = pop();
+ }
} break;
- case Inst::kStore2: {
- int ix = u8();
- stack[ix + 1] = pop();
- stack[ix + 0] = pop();
- } break;
-
- case Inst::kStore3: {
- int ix = u8();
- stack[ix + 2] = pop();
- stack[ix + 1] = pop();
- stack[ix + 0] = pop();
- } break;
-
- case Inst::kStore4: {
- int ix = u8();
- stack[ix + 3] = pop();
- stack[ix + 2] = pop();
- stack[ix + 1] = pop();
- stack[ix + 0] = pop();
- } break;
-
-
case Inst::kPushImmediate: {
push(bit_cast(p->splat(u32())));
} break;
case Inst::kDup: {
- push(stack[stack.size() - 1]);
- } break;
-
- case Inst::kDup2: {
- push(stack[stack.size() - 2]);
- push(stack[stack.size() - 2]);
- } break;
-
- case Inst::kDup3: {
- push(stack[stack.size() - 3]);
- push(stack[stack.size() - 3]);
- push(stack[stack.size() - 3]);
- } break;
-
- case Inst::kDup4: {
- push(stack[stack.size() - 4]);
- push(stack[stack.size() - 4]);
- push(stack[stack.size() - 4]);
- push(stack[stack.size() - 4]);
+ int N = u8();
+ for (int i = 0; i < N; ++i) {
+ push(stack[stack.size() - N]);
+ }
} break;
case Inst::kSwizzle: {
@@ -674,104 +607,34 @@
}
} break;
- case Inst::kAddF:
- case Inst::kAddF2:
- case Inst::kAddF3:
- case Inst::kAddF4:
- case Inst::kAddFN: binary(Inst::kAddF, std::plus<>{}, true); break;
-
- case Inst::kSubtractF:
- case Inst::kSubtractF2:
- case Inst::kSubtractF3:
- case Inst::kSubtractF4:
- case Inst::kSubtractFN: binary(Inst::kSubtractF, std::minus<>{}, true); break;
-
- case Inst::kMultiplyF:
- case Inst::kMultiplyF2:
- case Inst::kMultiplyF3:
- case Inst::kMultiplyF4:
- case Inst::kMultiplyFN: binary(Inst::kMultiplyF, std::multiplies<>{}, true); break;
-
- case Inst::kDivideF:
- case Inst::kDivideF2:
- case Inst::kDivideF3:
- case Inst::kDivideF4:
- case Inst::kDivideFN: binary(Inst::kDivideF, std::divides<>{}, true); break;
+ case Inst::kAddF: binary(std::plus<>{}); break;
+ case Inst::kSubtractF: binary(std::minus<>{}); break;
+ case Inst::kMultiplyF: binary(std::multiplies<>{}); break;
+ case Inst::kDivideF: binary(std::divides<>{}); break;
+ case Inst::kNegateF: unary(std::negate<>{}); break;
case Inst::kMinF:
- case Inst::kMinF2:
- case Inst::kMinF3:
- case Inst::kMinF4:
- binary(Inst::kMinF, [](skvm::F32 x, skvm::F32 y) { return skvm::min(x,y); });
+ binary([](skvm::F32 x, skvm::F32 y) { return skvm::min(x,y); });
break;
case Inst::kMaxF:
- case Inst::kMaxF2:
- case Inst::kMaxF3:
- case Inst::kMaxF4:
- binary(Inst::kMaxF, [](skvm::F32 x, skvm::F32 y) { return skvm::max(x,y); });
+ binary([](skvm::F32 x, skvm::F32 y) { return skvm::max(x,y); });
break;
- case Inst::kNegateF:
- case Inst::kNegateF2:
- case Inst::kNegateF3:
- case Inst::kNegateF4:
- case Inst::kNegateFN: unary(Inst::kNegateF, std::negate<>{}, true); break;
-
case Inst::kPow:
- case Inst::kPow2:
- case Inst::kPow3:
- case Inst::kPow4:
- binary(Inst::kPow, [](skvm::F32 x, skvm::F32 y) { return skvm::approx_powf(x,y); });
+ binary([](skvm::F32 x, skvm::F32 y) { return skvm::approx_powf(x,y); });
break;
case Inst::kLerp:
- case Inst::kLerp2:
- case Inst::kLerp3:
- case Inst::kLerp4: {
- int N = (int)Inst::kLerp - (int)inst + 1;
+ ternary([](skvm::F32 x, skvm::F32 y, skvm::F32 t) { return skvm::lerp(x, y, t); });
+ break;
- skvm::F32 t[4],
- b[4],
- a[4];
- for (int i = N; i --> 0; ) { t[i] = pop(); }
- for (int i = N; i --> 0; ) { b[i] = pop(); }
- for (int i = N; i --> 0; ) { a[i] = pop(); }
-
- for (int i = 0; i < N; i++) {
- push(skvm::lerp(a[i], b[i], t[i]));
- }
- } break;
-
- case Inst::kATan:
- case Inst::kATan2:
- case Inst::kATan3:
- case Inst::kATan4: unary(Inst::kATan, skvm::approx_atan); break;
-
- case Inst::kCeil:
- case Inst::kCeil2:
- case Inst::kCeil3:
- case Inst::kCeil4: unary(Inst::kCeil, skvm::ceil); break;
-
- case Inst::kFloor:
- case Inst::kFloor2:
- case Inst::kFloor3:
- case Inst::kFloor4: unary(Inst::kFloor, skvm::floor); break;
-
- case Inst::kFract:
- case Inst::kFract2:
- case Inst::kFract3:
- case Inst::kFract4: unary(Inst::kFract, skvm::fract); break;
-
- case Inst::kSqrt:
- case Inst::kSqrt2:
- case Inst::kSqrt3:
- case Inst::kSqrt4: unary(Inst::kSqrt, skvm::sqrt); break;
-
- case Inst::kSin:
- case Inst::kSin2:
- case Inst::kSin3:
- case Inst::kSin4: unary(Inst::kSin, skvm::approx_sin); break;
+ case Inst::kATan: unary(skvm::approx_atan); break;
+ case Inst::kCeil: unary(skvm::ceil); break;
+ case Inst::kFloor: unary(skvm::floor); break;
+ case Inst::kFract: unary(skvm::fract); break;
+ case Inst::kSqrt: unary(skvm::sqrt); break;
+ case Inst::kSin: unary(skvm::approx_sin); break;
case Inst::kMatrixMultiply: {
// Computes M = A*B (all stored column major)
@@ -798,11 +661,9 @@
case Inst::kMaskPush: break;
case Inst::kMaskNegate: break;
- case Inst::kCompareFLT: {
- skvm::F32 x = pop(),
- a = pop();
- push(bit_cast(a<x));
- } break;
+ case Inst::kCompareFLT:
+ binary([](skvm::F32 x, skvm::F32 y) { return bit_cast(x<y); });
+ break;
case Inst::kMaskBlend: {
std::vector<skvm::F32> if_true,
diff --git a/src/sksl/SkSLByteCode.cpp b/src/sksl/SkSLByteCode.cpp
index bbd3c99..ae573c6 100644
--- a/src/sksl/SkSLByteCode.cpp
+++ b/src/sksl/SkSLByteCode.cpp
@@ -14,6 +14,7 @@
#include "src/sksl/SkSLByteCodeGenerator.h"
#include "src/sksl/SkSLExternalValue.h"
+#include <functional>
#include <vector>
namespace SkSL {
@@ -34,24 +35,24 @@
#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
-#define VECTOR_DISASSEMBLE(op, text) \
- case ByteCodeInstruction::op: printf(text); break; \
- case ByteCodeInstruction::op##2: printf(text "2"); break; \
- case ByteCodeInstruction::op##3: printf(text "3"); break; \
- case ByteCodeInstruction::op##4: printf(text "4"); break;
+#define DISASSEMBLE_COUNT(op, text) \
+ case ByteCodeInstruction::op: printf(text " %d", READ8()); break;
-#define VECTOR_MATRIX_DISASSEMBLE(op, text) \
- VECTOR_DISASSEMBLE(op, text) \
- case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
+#define DISASSEMBLE_COUNT_SLOT(op, text) \
+ case ByteCodeInstruction::op: { \
+ int N = READ8(), \
+ slot = READ8(); \
+ printf(text " %d [%d]", N, slot); \
+ } break;
static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
auto inst = READ_INST();
printf("%04x ", (int)inst);
switch (inst) {
- VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
- VECTOR_DISASSEMBLE(kAddI, "addi")
- case ByteCodeInstruction::kAndB: printf("andb"); break;
- VECTOR_DISASSEMBLE(kATan, "atan")
+ DISASSEMBLE_COUNT(kAddF, "addf")
+ DISASSEMBLE_COUNT(kAddI, "addi")
+ DISASSEMBLE_COUNT(kAndB, "andb")
+ DISASSEMBLE_COUNT(kATan, "atan")
case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
case ByteCodeInstruction::kCallExternal: {
@@ -61,55 +62,44 @@
printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
break;
}
- VECTOR_DISASSEMBLE(kCeil, "ceil")
+ DISASSEMBLE_COUNT(kCeil, "ceil")
case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
- VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq")
- VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq")
- VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq")
- VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq")
- VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt")
- VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq")
- VECTOR_DISASSEMBLE(kCompareFLT, "compareflt")
- VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq")
- VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt")
- VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq")
- VECTOR_DISASSEMBLE(kCompareSLT, "compareslt")
- VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq")
- VECTOR_DISASSEMBLE(kCompareUGT, "compareugt")
- VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
- VECTOR_DISASSEMBLE(kCompareULT, "compareult")
- VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
- VECTOR_DISASSEMBLE(kConvertFtoI, "convertftoi")
- VECTOR_DISASSEMBLE(kConvertStoF, "convertstof")
- VECTOR_DISASSEMBLE(kConvertUtoF, "convertutof")
- VECTOR_DISASSEMBLE(kCos, "cos")
- VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
- VECTOR_DISASSEMBLE(kDivideS, "divideS")
- VECTOR_DISASSEMBLE(kDivideU, "divideu")
- VECTOR_MATRIX_DISASSEMBLE(kDup, "dup")
- VECTOR_DISASSEMBLE(kFloor, "floor")
- VECTOR_DISASSEMBLE(kFract, "fract")
+ DISASSEMBLE_COUNT(kCompareIEQ, "compareieq")
+ DISASSEMBLE_COUNT(kCompareINEQ, "compareineq")
+ DISASSEMBLE_COUNT(kCompareFEQ, "comparefeq")
+ DISASSEMBLE_COUNT(kCompareFNEQ, "comparefneq")
+ DISASSEMBLE_COUNT(kCompareFGT, "comparefgt")
+ DISASSEMBLE_COUNT(kCompareFGTEQ, "comparefgteq")
+ DISASSEMBLE_COUNT(kCompareFLT, "compareflt")
+ DISASSEMBLE_COUNT(kCompareFLTEQ, "compareflteq")
+ DISASSEMBLE_COUNT(kCompareSGT, "comparesgt")
+ DISASSEMBLE_COUNT(kCompareSGTEQ, "comparesgteq")
+ DISASSEMBLE_COUNT(kCompareSLT, "compareslt")
+ DISASSEMBLE_COUNT(kCompareSLTEQ, "compareslteq")
+ DISASSEMBLE_COUNT(kCompareUGT, "compareugt")
+ DISASSEMBLE_COUNT(kCompareUGTEQ, "compareugteq")
+ DISASSEMBLE_COUNT(kCompareULT, "compareult")
+ DISASSEMBLE_COUNT(kCompareULTEQ, "compareulteq")
+ DISASSEMBLE_COUNT(kConvertFtoI, "convertftoi")
+ DISASSEMBLE_COUNT(kConvertStoF, "convertstof")
+ DISASSEMBLE_COUNT(kConvertUtoF, "convertutof")
+ DISASSEMBLE_COUNT(kCos, "cos")
+ DISASSEMBLE_COUNT(kDivideF, "dividef")
+ DISASSEMBLE_COUNT(kDivideS, "divideS")
+ DISASSEMBLE_COUNT(kDivideU, "divideu")
+ DISASSEMBLE_COUNT(kDup, "dup")
+ DISASSEMBLE_COUNT(kFloor, "floor")
+ DISASSEMBLE_COUNT(kFract, "fract")
case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
- VECTOR_DISASSEMBLE(kLerp, "lerp")
- case ByteCodeInstruction::kLoad: printf("load %d", READ8()); break;
- case ByteCodeInstruction::kLoad2: printf("load2 %d", READ8()); break;
- case ByteCodeInstruction::kLoad3: printf("load3 %d", READ8()); break;
- case ByteCodeInstruction::kLoad4: printf("load4 %d", READ8()); break;
- case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ8()); break;
- case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ8()); break;
- case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ8()); break;
- case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ8()); break;
- case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ8()); break;
- case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ8()); break;
- case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ8()); break;
- case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ8()); break;
- case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break;
- case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8());
- break;
- case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8());
- break;
+ DISASSEMBLE_COUNT(kLerp, "lerp")
+ DISASSEMBLE_COUNT_SLOT(kLoad, "load")
+ DISASSEMBLE_COUNT_SLOT(kLoadGlobal, "loadglobal")
+ DISASSEMBLE_COUNT_SLOT(kLoadUniform, "loaduniform")
+ DISASSEMBLE_COUNT(kLoadExtended, "loadextended")
+ DISASSEMBLE_COUNT(kLoadExtendedGlobal, "loadextendedglobal")
+ DISASSEMBLE_COUNT(kLoadExtendedUniform, "loadextendeduniform")
case ByteCodeInstruction::kLoadFragCoord: printf("loadfragcoord"); break;
case ByteCodeInstruction::kMatrixToMatrix: {
int srcCols = READ8();
@@ -126,34 +116,31 @@
printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
break;
}
- VECTOR_DISASSEMBLE(kMaxF, "maxf")
- VECTOR_DISASSEMBLE(kMaxS, "maxs")
- VECTOR_DISASSEMBLE(kMinF, "minf")
- VECTOR_DISASSEMBLE(kMinS, "mins")
- VECTOR_DISASSEMBLE(kMix, "mix")
- VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
- VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
- VECTOR_MATRIX_DISASSEMBLE(kNegateF, "negatef")
- VECTOR_DISASSEMBLE(kNegateI, "negatei")
- VECTOR_DISASSEMBLE(kNotB, "notb")
- case ByteCodeInstruction::kOrB: printf("orb"); break;
- VECTOR_MATRIX_DISASSEMBLE(kPop, "pop")
- VECTOR_DISASSEMBLE(kPow, "pow")
+ DISASSEMBLE_COUNT(kMaxF, "maxf")
+ DISASSEMBLE_COUNT(kMaxS, "maxs")
+ DISASSEMBLE_COUNT(kMinF, "minf")
+ DISASSEMBLE_COUNT(kMinS, "mins")
+ DISASSEMBLE_COUNT(kMix, "mix")
+ DISASSEMBLE_COUNT(kMultiplyF, "multiplyf")
+ DISASSEMBLE_COUNT(kMultiplyI, "multiplyi")
+ DISASSEMBLE_COUNT(kNegateF, "negatef")
+ DISASSEMBLE_COUNT(kNegateI, "negatei")
+ DISASSEMBLE_COUNT(kNotB, "notb")
+ DISASSEMBLE_COUNT(kOrB, "orb")
+ DISASSEMBLE_COUNT(kPop, "pop")
+ DISASSEMBLE_COUNT(kPow, "pow")
case ByteCodeInstruction::kPushImmediate: {
uint32_t v = READ32();
union { uint32_t u; float f; } pun = { v };
printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
break;
}
- case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ8()); break;
- case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ8()); break;
- case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ8()); break;
- case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ8()); break;
- VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
- VECTOR_DISASSEMBLE(kRemainderS, "remainders")
- VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
- case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break;
- case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break;
+ DISASSEMBLE_COUNT_SLOT(kReadExternal, "readexternal")
+ DISASSEMBLE_COUNT(kRemainderF, "remainderf")
+ DISASSEMBLE_COUNT(kRemainderS, "remainders")
+ DISASSEMBLE_COUNT(kRemainderU, "remainderu")
+ DISASSEMBLE_COUNT(kReserve, "reserve")
+ DISASSEMBLE_COUNT(kReturn, "return")
case ByteCodeInstruction::kSampleExplicit: printf("sample %d", READ8()); break;
case ByteCodeInstruction::kSampleMatrix: printf("sampleMtx %d", READ8()); break;
case ByteCodeInstruction::kScalarToMatrix: {
@@ -165,21 +152,14 @@
case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
- VECTOR_DISASSEMBLE(kSin, "sin")
- VECTOR_DISASSEMBLE(kSqrt, "sqrt")
- case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
- case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
- case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
- case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break;
- case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break;
- case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break;
- case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break;
- case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break;
- case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break;
- case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8());
- break;
- VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf")
- VECTOR_DISASSEMBLE(kSubtractI, "subtracti")
+ DISASSEMBLE_COUNT(kSin, "sin")
+ DISASSEMBLE_COUNT(kSqrt, "sqrt")
+ DISASSEMBLE_COUNT_SLOT(kStore, "store")
+ DISASSEMBLE_COUNT_SLOT(kStoreGlobal, "storeglobal")
+ DISASSEMBLE_COUNT(kStoreExtended, "storeextended")
+ DISASSEMBLE_COUNT(kStoreExtendedGlobal, "storeextendedglobal")
+ DISASSEMBLE_COUNT(kSubtractF, "subtractf")
+ DISASSEMBLE_COUNT(kSubtractI, "subtracti")
case ByteCodeInstruction::kSwizzle: {
printf("swizzle %d, ", READ8());
int count = READ8();
@@ -189,12 +169,9 @@
}
break;
}
- VECTOR_DISASSEMBLE(kTan, "tan")
- case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ8()); break;
- case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ8()); break;
- case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ8()); break;
- case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ8()); break;
- case ByteCodeInstruction::kXorB: printf("xorb"); break;
+ DISASSEMBLE_COUNT(kTan, "tan")
+ DISASSEMBLE_COUNT_SLOT(kWriteExternal, "writeexternal")
+ DISASSEMBLE_COUNT(kXorB, "xorb")
case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
@@ -216,117 +193,47 @@
return ip;
}
-#define VECTOR_BINARY_OP(base, field, op) \
- case ByteCodeInstruction::base ## 4: { \
- sp[-4] = sp[-4].field op sp[0].field; \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base ## 3: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- sp[count] = sp[count].field op sp[0].field; \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base ## 2: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- sp[count] = sp[count].field op sp[0].field; \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- sp[count] = sp[count].field op sp[0].field; \
- POP(); \
- continue; \
- }
-
// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
-#define VECTOR_BINARY_MASKED_OP(base, field, op) \
- case ByteCodeInstruction::base ## 4: { \
- for (int i = 0; i < VecWidth; ++i) { \
- if (mask()[i]) { \
- sp[-4].field[i] op ## = sp[0].field[i]; \
- } \
- } \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base ## 3: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- for (int i = 0; i < VecWidth; ++i) { \
- if (mask()[i]) { \
- sp[count].field[i] op ## = sp[0].field[i]; \
- } \
- } \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base ## 2: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- for (int i = 0; i < VecWidth; ++i) { \
- if (mask()[i]) { \
- sp[count].field[i] op ## = sp[0].field[i]; \
- } \
- } \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- for (int i = 0; i < VecWidth; ++i) { \
- if (mask()[i]) { \
- sp[count].field[i] op ## = sp[0].field[i]; \
- } \
- } \
- POP(); \
- continue; \
- }
+// TODO: Would it be better to do this with a select of (lane, 1) based on mask?
+#define VECTOR_BINARY_MASKED_OP(inst, field, op) \
+ case ByteCodeInstruction::inst: { \
+ int count = READ8(); \
+ for (int i = count; i > 0; --i) { \
+ for (int j = 0; j < VecWidth; ++j) { \
+ if (mask()[j]) { \
+ sp[-count].field[j] op ## = sp[0].field[j]; \
+ } \
+ } \
+ POP(); \
+ } \
+ } continue;
-
-#define VECTOR_MATRIX_BINARY_OP(base, field, op) \
- VECTOR_BINARY_OP(base, field, op) \
- case ByteCodeInstruction::base ## N: { \
+#define VECTOR_BINARY_OP(inst, field, op) \
+ case ByteCodeInstruction::inst: { \
int count = READ8(); \
for (int i = count; i > 0; --i) { \
sp[-count] = sp[-count].field op sp[0].field; \
POP(); \
} \
- continue; \
- }
+ } continue;
-#define VECTOR_BINARY_FN(base, field, fn) \
- case ByteCodeInstruction::base ## 4: { \
- sp[-4] = fn(sp[-4].field, sp[0].field); \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base ## 3: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- sp[count] = fn(sp[count].field, sp[0].field); \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base ## 2: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- sp[count] = fn(sp[count].field, sp[0].field); \
- POP(); \
- [[fallthrough]]; \
- } \
- case ByteCodeInstruction::base: { \
- int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
- sp[count] = fn(sp[count].field, sp[0].field); \
- POP(); \
- continue; \
- }
+#define VECTOR_BINARY_FN(inst, field, fn) \
+ case ByteCodeInstruction::inst: { \
+ int count = READ8(); \
+ for (int i = count; i > 0; --i) { \
+ sp[-count] = fn(sp[-count].field, sp[0].field); \
+ POP(); \
+ } \
+ } continue;
-#define VECTOR_UNARY_FN(base, fn, field) \
- case ByteCodeInstruction::base ## 4: sp[-3] = fn(sp[-3].field); [[fallthrough]]; \
- case ByteCodeInstruction::base ## 3: sp[-2] = fn(sp[-2].field); [[fallthrough]]; \
- case ByteCodeInstruction::base ## 2: sp[-1] = fn(sp[-1].field); [[fallthrough]]; \
- case ByteCodeInstruction::base: sp[ 0] = fn(sp[ 0].field); \
- continue;
+#define VECTOR_UNARY_FN(inst, fn, field) \
+ case ByteCodeInstruction::inst: { \
+ int count = READ8(); \
+ for (int i = count; i --> 0; ) { \
+ sp[-i] = fn(sp[-i].field); \
+ } \
+ } continue;
union VValue {
VValue() {}
@@ -506,27 +413,14 @@
ByteCodeInstruction inst = READ_INST();
switch (inst) {
- VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
+ VECTOR_BINARY_OP(kAddF, fFloat, +)
VECTOR_BINARY_OP(kAddI, fSigned, +)
// Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
- case ByteCodeInstruction::kAndB:
- sp[-1] = sp[-1].fSigned & sp[0].fSigned;
- POP();
- continue;
- case ByteCodeInstruction::kNotB4: sp[-3] = ~sp[-3].fSigned; [[fallthrough]];
- case ByteCodeInstruction::kNotB3: sp[-2] = ~sp[-2].fSigned; [[fallthrough]];
- case ByteCodeInstruction::kNotB2: sp[-1] = ~sp[-1].fSigned; [[fallthrough]];
- case ByteCodeInstruction::kNotB: sp[ 0] = ~sp[ 0].fSigned;
- continue;
- case ByteCodeInstruction::kOrB:
- sp[-1] = sp[-1].fSigned | sp[0].fSigned;
- POP();
- continue;
- case ByteCodeInstruction::kXorB:
- sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
- POP();
- continue;
+ VECTOR_BINARY_OP(kAndB, fSigned, &)
+ VECTOR_BINARY_OP(kOrB, fSigned, |)
+ VECTOR_BINARY_OP(kXorB, fSigned, ^)
+ VECTOR_UNARY_FN(kNotB, std::bit_not<>{}, fSigned)
case ByteCodeInstruction::kBranch:
ip = code + READ16();
@@ -548,13 +442,11 @@
stack[i].fFloat = 0.0f;
}
}
- continue;
- }
+ } continue;
- case ByteCodeInstruction::kCallExternal: {
+ case ByteCodeInstruction::kCallExternal:
CallExternal(byteCode, ip, sp, baseIndex, mask());
continue;
- }
VECTOR_UNARY_FN(kCeil, skvx::ceil, fFloat)
@@ -563,90 +455,40 @@
if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
return false;
}
- continue;
- }
+ } continue;
- VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
- VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
- VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
- VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
- VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
- VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
- VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
- VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
+ VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
+ VECTOR_BINARY_OP(kCompareFEQ, fFloat, ==)
+ VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
+ VECTOR_BINARY_OP(kCompareFNEQ, fFloat, !=)
+ VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
+ VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
+ VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
+ VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
- VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
- VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
- VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
- VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
- VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
+ VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
+ VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
+ VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
+ VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
+ VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
- VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
+ VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
- case ByteCodeInstruction::kConvertFtoI4:
- sp[-3] = skvx::cast<int>(sp[-3].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertFtoI3:
- sp[-2] = skvx::cast<int>(sp[-2].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertFtoI2:
- sp[-1] = skvx::cast<int>(sp[-1].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertFtoI:
- sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
- continue;
-
- case ByteCodeInstruction::kConvertStoF4:
- sp[-3] = skvx::cast<float>(sp[-3].fSigned);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertStoF3:
- sp[-2] = skvx::cast<float>(sp[-2].fSigned);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertStoF2:
- sp[-1] = skvx::cast<float>(sp[-1].fSigned);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertStoF:
- sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
- continue;
-
- case ByteCodeInstruction::kConvertUtoF4:
- sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertUtoF3:
- sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertUtoF2:
- sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
- [[fallthrough]];
- case ByteCodeInstruction::kConvertUtoF:
- sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
- continue;
+ VECTOR_UNARY_FN(kConvertFtoI, skvx::cast<int>, fFloat)
+ VECTOR_UNARY_FN(kConvertStoF, skvx::cast<float>, fSigned)
+ VECTOR_UNARY_FN(kConvertUtoF, skvx::cast<float>, fUnsigned)
VECTOR_UNARY_FN(kCos, skvx::cos, fFloat)
VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
- VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
+ VECTOR_BINARY_OP(kDivideF, fFloat, /)
- case ByteCodeInstruction::kDup4:
- PUSH(sp[(int)inst - (int)ByteCodeInstruction::kDup]);
- [[fallthrough]];
- case ByteCodeInstruction::kDup3:
- PUSH(sp[(int)inst - (int)ByteCodeInstruction::kDup]);
- [[fallthrough]];
- case ByteCodeInstruction::kDup2:
- PUSH(sp[(int)inst - (int)ByteCodeInstruction::kDup]);
- [[fallthrough]];
- case ByteCodeInstruction::kDup :
- PUSH(sp[(int)inst - (int)ByteCodeInstruction::kDup]);
- continue;
-
- case ByteCodeInstruction::kDupN: {
+ case ByteCodeInstruction::kDup: {
int count = READ8();
memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
sp += count;
- continue;
- }
+ } continue;
VECTOR_UNARY_FN(kFloor, skvx::floor, fFloat)
VECTOR_UNARY_FN(kFract, skvx::fract, fFloat)
@@ -661,11 +503,8 @@
Inverse4x4(sp);
continue;
- case ByteCodeInstruction::kLerp4:
- case ByteCodeInstruction::kLerp3:
- case ByteCodeInstruction::kLerp2:
case ByteCodeInstruction::kLerp: {
- int count = (int)ByteCodeInstruction::kLerp - (int)inst + 1;
+ int count = READ8();
VValue* T = sp - count + 1,
* B = T - count,
* A = B - count;
@@ -673,39 +512,30 @@
A[i].fFloat += (B[i].fFloat - A[i].fFloat) * T[i].fFloat;
}
sp -= 2 * count;
- continue;
- }
+ } continue;
- case ByteCodeInstruction::kLoad4: sp[4] = stack[*ip + 3]; [[fallthrough]];
- case ByteCodeInstruction::kLoad3: sp[3] = stack[*ip + 2]; [[fallthrough]];
- case ByteCodeInstruction::kLoad2: sp[2] = stack[*ip + 1]; [[fallthrough]];
- case ByteCodeInstruction::kLoad: sp[1] = stack[*ip + 0];
- ++ip;
- sp += (int)ByteCodeInstruction::kLoad - (int)inst + 1;
- continue;
+ case ByteCodeInstruction::kLoad: {
+ int count = READ8(),
+ slot = READ8();
+ memcpy(sp + 1, stack + slot, count * sizeof(VValue));
+ sp += count;
+ } continue;
- case ByteCodeInstruction::kLoadGlobal4: sp[4] = globals[*ip + 3]; [[fallthrough]];
- case ByteCodeInstruction::kLoadGlobal3: sp[3] = globals[*ip + 2]; [[fallthrough]];
- case ByteCodeInstruction::kLoadGlobal2: sp[2] = globals[*ip + 1]; [[fallthrough]];
- case ByteCodeInstruction::kLoadGlobal: sp[1] = globals[*ip + 0];
- ++ip;
- sp += (int)ByteCodeInstruction::kLoadGlobal - (int)inst + 1;
- continue;
+ case ByteCodeInstruction::kLoadGlobal: {
+ int count = READ8(),
+ slot = READ8();
+ memcpy(sp + 1, globals + slot, count * sizeof(VValue));
+ sp += count;
+ } continue;
- case ByteCodeInstruction::kLoadUniform4:
- sp[4].fFloat = uniforms[*ip + 3];
- [[fallthrough]];
- case ByteCodeInstruction::kLoadUniform3:
- sp[3].fFloat = uniforms[*ip + 2];
- [[fallthrough]];
- case ByteCodeInstruction::kLoadUniform2:
- sp[2].fFloat = uniforms[*ip + 1];
- [[fallthrough]];
- case ByteCodeInstruction::kLoadUniform:
- sp[1].fFloat = uniforms[*ip + 0];
- ++ip;
- sp += (int)ByteCodeInstruction::kLoadUniform - (int)inst + 1;
- continue;
+ case ByteCodeInstruction::kLoadUniform: {
+ int count = READ8(),
+ slot = READ8();
+ for (int i = 0; i < count; ++i) {
+ sp[i + 1].fFloat = uniforms[slot + i];
+ }
+ sp += count;
+ } continue;
case ByteCodeInstruction::kLoadExtended: {
int count = READ8();
@@ -719,8 +549,7 @@
}
}
sp += count;
- continue;
- }
+ } continue;
case ByteCodeInstruction::kLoadExtendedGlobal: {
int count = READ8();
@@ -734,8 +563,7 @@
}
}
sp += count;
- continue;
- }
+ } continue;
case ByteCodeInstruction::kLoadExtendedUniform: {
int count = READ8();
@@ -749,8 +577,7 @@
}
}
sp += count;
- continue;
- }
+ } continue;
case ByteCodeInstruction::kMatrixToMatrix: {
int srcCols = READ8();
@@ -774,8 +601,7 @@
PUSH(tmp[c*4 + r]);
}
}
- continue;
- }
+ } continue;
case ByteCodeInstruction::kMatrixMultiply: {
int lCols = READ8();
@@ -795,19 +621,15 @@
sp -= (lCols * lRows) + (rCols * rRows);
memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
sp += (rCols * lRows);
- continue;
- }
+ } continue;
VECTOR_BINARY_FN(kMaxF, fFloat, skvx::max)
VECTOR_BINARY_FN(kMaxS, fSigned, skvx::max)
VECTOR_BINARY_FN(kMinF, fFloat, skvx::min)
VECTOR_BINARY_FN(kMinS, fSigned, skvx::min)
- case ByteCodeInstruction::kMix4:
- case ByteCodeInstruction::kMix3:
- case ByteCodeInstruction::kMix2:
case ByteCodeInstruction::kMix: {
- int count = (int)ByteCodeInstruction::kMix - (int)inst + 1;
+ int count = READ8();
for (int i = count; i --> 0; ) {
// GLSL's arguments are mix(else, true, cond)
sp[-(2*count + i)] = skvx::if_then_else(sp[-( i)].fSigned,
@@ -815,39 +637,15 @@
sp[-(2*count + i)].fFloat);
}
sp -= 2 * count;
- continue;
- }
+ } continue;
VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
- VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
+ VECTOR_BINARY_OP(kMultiplyF, fFloat, *)
- case ByteCodeInstruction::kNegateF4: sp[-3] = -sp[-3].fFloat; [[fallthrough]];
- case ByteCodeInstruction::kNegateF3: sp[-2] = -sp[-2].fFloat; [[fallthrough]];
- case ByteCodeInstruction::kNegateF2: sp[-1] = -sp[-1].fFloat; [[fallthrough]];
- case ByteCodeInstruction::kNegateF: sp[ 0] = -sp[ 0].fFloat;
- continue;
+ VECTOR_UNARY_FN(kNegateF, std::negate<>{}, fFloat)
+ VECTOR_UNARY_FN(kNegateI, std::negate<>{}, fSigned)
- case ByteCodeInstruction::kNegateFN: {
- int count = READ8();
- for (int i = count - 1; i >= 0; --i) {
- sp[-i] = -sp[-i].fFloat;
- }
- continue;
- }
-
- case ByteCodeInstruction::kNegateI4: sp[-3] = -sp[-3].fSigned; [[fallthrough]];
- case ByteCodeInstruction::kNegateI3: sp[-2] = -sp[-2].fSigned; [[fallthrough]];
- case ByteCodeInstruction::kNegateI2: sp[-1] = -sp[-1].fSigned; [[fallthrough]];
- case ByteCodeInstruction::kNegateI: sp[ 0] = -sp[ 0].fSigned;
- continue;
-
- case ByteCodeInstruction::kPop4: POP(); [[fallthrough]];
- case ByteCodeInstruction::kPop3: POP(); [[fallthrough]];
- case ByteCodeInstruction::kPop2: POP(); [[fallthrough]];
- case ByteCodeInstruction::kPop: POP();
- continue;
-
- case ByteCodeInstruction::kPopN:
+ case ByteCodeInstruction::kPop:
sp -= READ8();
continue;
@@ -857,25 +655,22 @@
PUSH(U32(READ32()));
continue;
- case ByteCodeInstruction::kReadExternal:
- case ByteCodeInstruction::kReadExternal2:
- case ByteCodeInstruction::kReadExternal3:
- case ByteCodeInstruction::kReadExternal4: {
- int count = (int)ByteCodeInstruction::kReadExternal - (int)inst + 1;
- int src = READ8();
+ case ByteCodeInstruction::kReadExternal: {
+ int count = READ8(),
+ slot = READ8();
+ SkASSERT(count <= 4);
float tmp[4];
I32 m = mask();
for (int i = 0; i < VecWidth; ++i) {
if (m[i]) {
- byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
+ byteCode->fExternalValues[slot]->read(baseIndex + i, tmp);
for (int j = 0; j < count; ++j) {
sp[j + 1].fFloat[i] = tmp[j];
}
}
}
sp += count;
- continue;
- }
+ } continue;
VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
@@ -923,9 +718,8 @@
code = frame.fCode;
ip = frame.fIP;
frames.pop_back();
- continue;
}
- }
+ } continue;
case ByteCodeInstruction::kScalarToMatrix: {
int cols = READ8();
@@ -936,8 +730,7 @@
PUSH(c == r ? v : F32(0.0f));
}
}
- continue;
- }
+ } continue;
case ByteCodeInstruction::kShiftLeft:
sp[0] = sp[0].fSigned << READ8();
@@ -952,33 +745,23 @@
VECTOR_UNARY_FN(kSin, skvx::sin, fFloat)
VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
- case ByteCodeInstruction::kStore4:
- stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kStore3:
- stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kStore2:
- stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kStore:
- stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
- ++ip;
- continue;
+ case ByteCodeInstruction::kStore: {
+ int count = READ8(),
+ slot = READ8();
+ auto m = mask();
+ for (int i = count; i --> 0; ) {
+ stack[slot+i] = skvx::if_then_else(m, POP().fFloat, stack[slot+i].fFloat);
+ }
+ } continue;
- case ByteCodeInstruction::kStoreGlobal4:
- globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kStoreGlobal3:
- globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kStoreGlobal2:
- globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
- [[fallthrough]];
- case ByteCodeInstruction::kStoreGlobal:
- globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
- ++ip;
- continue;
+ case ByteCodeInstruction::kStoreGlobal: {
+ int count = READ8(),
+ slot = READ8();
+ auto m = mask();
+ for (int i = count; i --> 0; ) {
+ globals[slot+i] = skvx::if_then_else(m, POP().fFloat, globals[slot+i].fFloat);
+ }
+ } continue;
case ByteCodeInstruction::kStoreExtended: {
int count = READ8();
@@ -993,8 +776,8 @@
}
}
sp -= count;
- continue;
- }
+ } continue;
+
case ByteCodeInstruction::kStoreExtendedGlobal: {
int count = READ8();
I32 target = POP().fSigned;
@@ -1008,11 +791,10 @@
}
}
sp -= count;
- continue;
- }
+ } continue;
VECTOR_BINARY_OP(kSubtractI, fSigned, -)
- VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
+ VECTOR_BINARY_OP(kSubtractF, fFloat, -)
case ByteCodeInstruction::kSwizzle: {
VValue tmp[4];
@@ -1022,18 +804,15 @@
for (int i = READ8() - 1; i >= 0; --i) {
PUSH(tmp[READ8()]);
}
- continue;
- }
+ } continue;
VECTOR_UNARY_FN(kATan, skvx::atan, fFloat)
VECTOR_UNARY_FN(kTan, skvx::tan, fFloat)
- case ByteCodeInstruction::kWriteExternal4:
- case ByteCodeInstruction::kWriteExternal3:
- case ByteCodeInstruction::kWriteExternal2:
case ByteCodeInstruction::kWriteExternal: {
- int count = (int)ByteCodeInstruction::kWriteExternal - (int)inst + 1;
- int target = READ8();
+ int count = READ8(),
+ slot = READ8();
+ SkASSERT(count <= 4);
float tmp[4];
I32 m = mask();
sp -= count;
@@ -1042,11 +821,10 @@
for (int j = 0; j < count; ++j) {
tmp[j] = sp[j + 1].fFloat[i];
}
- byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
+ byteCode->fExternalValues[slot]->write(baseIndex + i, tmp);
}
}
- continue;
- }
+ } continue;
case ByteCodeInstruction::kMaskPush:
condPtr[1] = POP().fSigned;
@@ -1067,15 +845,13 @@
sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
--sp;
}
- continue;
- }
+ } continue;
case ByteCodeInstruction::kBranchIfAllFalse: {
int target = READ16();
if (!skvx::any(mask())) {
ip = code + target;
}
- continue;
- }
+ } continue;
case ByteCodeInstruction::kLoopBegin:
contPtr[1] = 0;
@@ -1099,8 +875,7 @@
I32 m = mask();
*contPtr |= m;
*loopPtr &= ~m;
- continue;
- }
+ } continue;
case ByteCodeInstruction::kLoadFragCoord:
case ByteCodeInstruction::kSampleExplicit:
diff --git a/src/sksl/SkSLByteCode.h b/src/sksl/SkSLByteCode.h
index 9a1b19a..3832b9b 100644
--- a/src/sksl/SkSLByteCode.h
+++ b/src/sksl/SkSLByteCode.h
@@ -19,65 +19,61 @@
class ExternalValue;
struct FunctionDeclaration;
-#define VECTOR(name) name ## 4, name ## 3, name ## 2, name
-#define VECTOR_MATRIX(name) name ## N, name ## 4, name ## 3, name ## 2, name
-
enum class ByteCodeInstruction : uint16_t {
// B = bool, F = float, I = int, S = signed, U = unsigned
- VECTOR_MATRIX(kAddF),
- VECTOR(kAddI),
- kAndB,
- VECTOR(kATan),
+
+ kAddF, // N
+ kAddI, // N
+ kAndB, // N
+ kATan, // N
kBranch,
// Followed by a byte indicating the index of the function to call
kCall,
// Followed by three bytes indicating: the number of argument slots, the number of return slots,
// and the index of the external value to call
kCallExternal,
- VECTOR(kCeil),
+ kCeil, // N
// For dynamic array access: Followed by byte indicating length of array
kClampIndex,
- VECTOR(kCompareIEQ),
- VECTOR(kCompareINEQ),
- VECTOR_MATRIX(kCompareFEQ),
- VECTOR_MATRIX(kCompareFNEQ),
- VECTOR(kCompareFGT),
- VECTOR(kCompareFGTEQ),
- VECTOR(kCompareFLT),
- VECTOR(kCompareFLTEQ),
- VECTOR(kCompareSGT),
- VECTOR(kCompareSGTEQ),
- VECTOR(kCompareSLT),
- VECTOR(kCompareSLTEQ),
- VECTOR(kCompareUGT),
- VECTOR(kCompareUGTEQ),
- VECTOR(kCompareULT),
- VECTOR(kCompareULTEQ),
- VECTOR(kConvertFtoI),
- VECTOR(kConvertStoF),
- VECTOR(kConvertUtoF),
- VECTOR(kCos),
- VECTOR_MATRIX(kDivideF),
- VECTOR(kDivideS),
- VECTOR(kDivideU),
- // Duplicates the top stack value
- VECTOR_MATRIX(kDup),
- VECTOR(kFloor),
- VECTOR(kFract),
+ kCompareIEQ, // N
+ kCompareINEQ, // N
+ kCompareFEQ, // N
+ kCompareFNEQ, // N
+ kCompareFGT, // N
+ kCompareFGTEQ, // N
+ kCompareFLT, // N
+ kCompareFLTEQ, // N
+ kCompareSGT, // N
+ kCompareSGTEQ, // N
+ kCompareSLT, // N
+ kCompareSLTEQ, // N
+ kCompareUGT, // N
+ kCompareUGTEQ, // N
+ kCompareULT, // N
+ kCompareULTEQ, // N
+ kConvertFtoI, // N
+ kConvertStoF, // N
+ kConvertUtoF, // N
+ kCos, // N
+ kDivideF, // N
+ kDivideS, // N
+ kDivideU, // N
+ // Duplicates the top N stack values
+ kDup, // N
+ kFloor, // N
+ kFract, // N
kInverse2x2,
kInverse3x3,
kInverse4x4,
// A1, A2, .., B1, B2, .., T1, T2, .. -> lerp(A1, B1, T1), lerp(A2, B2, T2), ..
- VECTOR(kLerp),
- // kLoad/kLoadGlobal are followed by a byte indicating the local/global slot to load
- VECTOR(kLoad),
- VECTOR(kLoadGlobal),
- VECTOR(kLoadUniform),
- // kLoadExtended* are fallback load ops when we lack a specialization. They are followed by a
- // count byte, and get the slot to load from the top of the stack.
- kLoadExtended,
- kLoadExtendedGlobal,
- kLoadExtendedUniform,
+ kLerp, // N
+ kLoad, // N, slot
+ kLoadGlobal, // N, slot
+ kLoadUniform, // N, slot
+ // Indirect loads get the slot to load from the top of the stack
+ kLoadExtended, // N
+ kLoadExtendedGlobal, // N
+ kLoadExtendedUniform, // N
// Loads "sk_FragCoord" [X, Y, Z, 1/W]
kLoadFragCoord,
// Followed by four bytes: srcCols, srcRows, dstCols, dstRows. Consumes the src matrix from the
@@ -87,28 +83,27 @@
kMatrixToMatrix,
// Followed by three bytes: leftCols (== rightRows), leftRows, rightCols
kMatrixMultiply,
- VECTOR(kMaxF),
- VECTOR(kMaxS), // SkSL only declares signed versions of min/max
- VECTOR(kMinF),
- VECTOR(kMinS),
+ kMaxF, // N
+ kMaxS, // N -- SkSL only declares signed versions of min/max
+ kMinF, // N
+ kMinS, // N
// Masked selection: Stack is ... A1, A2, A3, B1, B2, B3, M1, M2, M3
// Result: M1 ? B1 : A1, M2 ? B2 : A2, M3 ? B3 : A3
- VECTOR(kMix),
- VECTOR_MATRIX(kNegateF),
- VECTOR(kNegateI),
- VECTOR_MATRIX(kMultiplyF),
- VECTOR(kMultiplyI),
- VECTOR(kNotB),
- kOrB,
- VECTOR_MATRIX(kPop),
- VECTOR(kPow),
+ kMix, // N
+ kNegateF, // N
+ kNegateI, // N
+ kMultiplyF, // N
+ kMultiplyI, // N
+ kNotB, // N
+ kOrB, // N
+ kPop, // N
+ kPow, // N
// Followed by a 32 bit value containing the value to push
kPushImmediate,
- // Followed by a byte indicating external value to read
- VECTOR(kReadExternal),
- VECTOR(kRemainderF),
- VECTOR(kRemainderS),
- VECTOR(kRemainderU),
+ kReadExternal, // N, slot
+ kRemainderF, // N
+ kRemainderS, // N
+ kRemainderU, // N
// Followed by a byte indicating the number of slots to reserve on the stack (for later return)
kReserve,
// Followed by a byte indicating the number of slots being returned
@@ -126,24 +121,22 @@
kShiftLeft,
kShiftRightS,
kShiftRightU,
- VECTOR(kSin),
- VECTOR(kSqrt),
- // kStore/kStoreGlobal are followed by a byte indicating the local/global slot to store
- VECTOR(kStore),
- VECTOR(kStoreGlobal),
- // Fallback stores. Followed by count byte, and get the slot to store from the top of the stack
- kStoreExtended,
- kStoreExtendedGlobal,
+ kSin, // N
+ kSqrt, // N
+ kStore, // N, slot
+ kStoreGlobal, // N, slot
+ // Indirect stores get the slot to store from the top of the stack
+ kStoreExtended, // N
+ kStoreExtendedGlobal, // N
// Followed by two count bytes (1-4), and then one byte per swizzle component (0-3). The first
// count byte provides the current vector size (the vector is the top n stack elements), and the
// second count byte provides the swizzle component count.
kSwizzle,
- VECTOR_MATRIX(kSubtractF),
- VECTOR(kSubtractI),
- VECTOR(kTan),
- // Followed by a byte indicating external value to write
- VECTOR(kWriteExternal),
- kXorB,
+ kSubtractF, // N
+ kSubtractI, // N
+ kTan, // N
+ kWriteExternal, // N, slot
+ kXorB, // N
kMaskPush,
kMaskPop,
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index c28a982..c484d7c 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -191,7 +191,6 @@
SkASSERT(fStackCount == 0);
}
this->write(ByteCodeInstruction::kReturn, 0);
- this->write8(0);
result->fLocalCount = fLocals.size();
result->fConditionCount = fMaxConditionCount;
@@ -265,128 +264,90 @@
switch (inst) {
// Unary functions/operators that don't change stack depth at all:
-#define VECTOR_UNARY_OP(base) \
- case ByteCodeInstruction::base: \
- case ByteCodeInstruction::base ## 2: \
- case ByteCodeInstruction::base ## 3: \
- case ByteCodeInstruction::base ## 4: \
- return 0;
- VECTOR_UNARY_OP(kConvertFtoI)
- VECTOR_UNARY_OP(kConvertStoF)
- VECTOR_UNARY_OP(kConvertUtoF)
+#define VEC_UNARY(inst) case ByteCodeInstruction::inst: return count - count;
- VECTOR_UNARY_OP(kATan)
- VECTOR_UNARY_OP(kCeil)
- VECTOR_UNARY_OP(kCos)
- VECTOR_UNARY_OP(kFloor)
- VECTOR_UNARY_OP(kFract)
- VECTOR_UNARY_OP(kSin)
- VECTOR_UNARY_OP(kSqrt)
- VECTOR_UNARY_OP(kTan)
+ VEC_UNARY(kConvertFtoI)
+ VEC_UNARY(kConvertStoF)
+ VEC_UNARY(kConvertUtoF)
- VECTOR_UNARY_OP(kNegateF)
- VECTOR_UNARY_OP(kNegateI)
- VECTOR_UNARY_OP(kNotB)
+ VEC_UNARY(kATan)
+ VEC_UNARY(kCeil)
+ VEC_UNARY(kCos)
+ VEC_UNARY(kFloor)
+ VEC_UNARY(kFract)
+ VEC_UNARY(kSin)
+ VEC_UNARY(kSqrt)
+ VEC_UNARY(kTan)
+
+ VEC_UNARY(kNegateF)
+ VEC_UNARY(kNegateI)
+ VEC_UNARY(kNotB)
+
+#undef VEC_UNARY
case ByteCodeInstruction::kInverse2x2:
case ByteCodeInstruction::kInverse3x3:
case ByteCodeInstruction::kInverse4x4: return 0;
- case ByteCodeInstruction::kClampIndex: return 0;
- case ByteCodeInstruction::kNegateFN: return 0;
- case ByteCodeInstruction::kShiftLeft: return 0;
+ case ByteCodeInstruction::kClampIndex: return 0;
+ case ByteCodeInstruction::kShiftLeft: return 0;
case ByteCodeInstruction::kShiftRightS: return 0;
case ByteCodeInstruction::kShiftRightU: return 0;
-#undef VECTOR_UNARY_OP
+ // Binary functions/operators that do a 2 -> 1 reduction, N times
+ case ByteCodeInstruction::kAndB: return -count;
+ case ByteCodeInstruction::kOrB: return -count;
+ case ByteCodeInstruction::kXorB: return -count;
- // Binary functions/operators that do a 2 -> 1 reduction (possibly N times)
-#define VECTOR_BINARY_OP(base) \
- case ByteCodeInstruction::base: return -1; \
- case ByteCodeInstruction::base ## 2: return -2; \
- case ByteCodeInstruction::base ## 3: return -3; \
- case ByteCodeInstruction::base ## 4: return -4;
+ case ByteCodeInstruction::kAddI: return -count;
+ case ByteCodeInstruction::kAddF: return -count;
-#define VECTOR_MATRIX_BINARY_OP(base) \
- VECTOR_BINARY_OP(base) \
- case ByteCodeInstruction::base ## N: return -count;
+ case ByteCodeInstruction::kCompareIEQ: return -count;
+ case ByteCodeInstruction::kCompareFEQ: return -count;
+ case ByteCodeInstruction::kCompareINEQ: return -count;
+ case ByteCodeInstruction::kCompareFNEQ: return -count;
+ case ByteCodeInstruction::kCompareSGT: return -count;
+ case ByteCodeInstruction::kCompareUGT: return -count;
+ case ByteCodeInstruction::kCompareFGT: return -count;
+ case ByteCodeInstruction::kCompareSGTEQ: return -count;
+ case ByteCodeInstruction::kCompareUGTEQ: return -count;
+ case ByteCodeInstruction::kCompareFGTEQ: return -count;
+ case ByteCodeInstruction::kCompareSLT: return -count;
+ case ByteCodeInstruction::kCompareULT: return -count;
+ case ByteCodeInstruction::kCompareFLT: return -count;
+ case ByteCodeInstruction::kCompareSLTEQ: return -count;
+ case ByteCodeInstruction::kCompareULTEQ: return -count;
+ case ByteCodeInstruction::kCompareFLTEQ: return -count;
- case ByteCodeInstruction::kAndB: return -1;
- case ByteCodeInstruction::kOrB: return -1;
- case ByteCodeInstruction::kXorB: return -1;
-
- VECTOR_BINARY_OP(kAddI)
- VECTOR_MATRIX_BINARY_OP(kAddF)
-
- VECTOR_BINARY_OP(kCompareIEQ)
- VECTOR_MATRIX_BINARY_OP(kCompareFEQ)
- VECTOR_BINARY_OP(kCompareINEQ)
- VECTOR_MATRIX_BINARY_OP(kCompareFNEQ)
- VECTOR_BINARY_OP(kCompareSGT)
- VECTOR_BINARY_OP(kCompareUGT)
- VECTOR_BINARY_OP(kCompareFGT)
- VECTOR_BINARY_OP(kCompareSGTEQ)
- VECTOR_BINARY_OP(kCompareUGTEQ)
- VECTOR_BINARY_OP(kCompareFGTEQ)
- VECTOR_BINARY_OP(kCompareSLT)
- VECTOR_BINARY_OP(kCompareULT)
- VECTOR_BINARY_OP(kCompareFLT)
- VECTOR_BINARY_OP(kCompareSLTEQ)
- VECTOR_BINARY_OP(kCompareULTEQ)
- VECTOR_BINARY_OP(kCompareFLTEQ)
-
- VECTOR_BINARY_OP(kDivideS)
- VECTOR_BINARY_OP(kDivideU)
- VECTOR_MATRIX_BINARY_OP(kDivideF)
- VECTOR_BINARY_OP(kMaxF)
- VECTOR_BINARY_OP(kMaxS)
- VECTOR_BINARY_OP(kMinF)
- VECTOR_BINARY_OP(kMinS)
- VECTOR_BINARY_OP(kMultiplyI)
- VECTOR_MATRIX_BINARY_OP(kMultiplyF)
- VECTOR_BINARY_OP(kPow)
- VECTOR_BINARY_OP(kRemainderF)
- VECTOR_BINARY_OP(kRemainderS)
- VECTOR_BINARY_OP(kRemainderU)
- VECTOR_BINARY_OP(kSubtractI)
- VECTOR_MATRIX_BINARY_OP(kSubtractF)
-
-#undef VECTOR_BINARY_OP
-#undef VECTOR_MATRIX_BINARY_OP
+ case ByteCodeInstruction::kDivideS: return -count;
+ case ByteCodeInstruction::kDivideU: return -count;
+ case ByteCodeInstruction::kDivideF: return -count;
+ case ByteCodeInstruction::kMaxF: return -count;
+ case ByteCodeInstruction::kMaxS: return -count;
+ case ByteCodeInstruction::kMinF: return -count;
+ case ByteCodeInstruction::kMinS: return -count;
+ case ByteCodeInstruction::kMultiplyI: return -count;
+ case ByteCodeInstruction::kMultiplyF: return -count;
+ case ByteCodeInstruction::kPow: return -count;
+ case ByteCodeInstruction::kRemainderF: return -count;
+ case ByteCodeInstruction::kRemainderS: return -count;
+ case ByteCodeInstruction::kRemainderU: return -count;
+ case ByteCodeInstruction::kSubtractI: return -count;
+ case ByteCodeInstruction::kSubtractF: return -count;
// Ops that push or load data to grow the stack:
+ case ByteCodeInstruction::kPushImmediate:
+ return 1;
+ case ByteCodeInstruction::kLoadFragCoord:
+ return 4;
+
case ByteCodeInstruction::kDup:
case ByteCodeInstruction::kLoad:
case ByteCodeInstruction::kLoadGlobal:
case ByteCodeInstruction::kLoadUniform:
case ByteCodeInstruction::kReadExternal:
- case ByteCodeInstruction::kPushImmediate:
- return 1;
-
- case ByteCodeInstruction::kDup2:
- case ByteCodeInstruction::kLoad2:
- case ByteCodeInstruction::kLoadGlobal2:
- case ByteCodeInstruction::kLoadUniform2:
- case ByteCodeInstruction::kReadExternal2:
- return 2;
-
- case ByteCodeInstruction::kDup3:
- case ByteCodeInstruction::kLoad3:
- case ByteCodeInstruction::kLoadGlobal3:
- case ByteCodeInstruction::kLoadUniform3:
- case ByteCodeInstruction::kReadExternal3:
- return 3;
-
- case ByteCodeInstruction::kDup4:
- case ByteCodeInstruction::kLoad4:
- case ByteCodeInstruction::kLoadGlobal4:
- case ByteCodeInstruction::kLoadUniform4:
- case ByteCodeInstruction::kReadExternal4:
- case ByteCodeInstruction::kLoadFragCoord:
- return 4;
-
- case ByteCodeInstruction::kDupN:
+ case ByteCodeInstruction::kReserve:
return count;
// Pushes 'count' values, minus one for the 'address' that's consumed first
@@ -397,30 +358,10 @@
// Ops that pop or store data to shrink the stack:
case ByteCodeInstruction::kPop:
+ case ByteCodeInstruction::kReturn:
case ByteCodeInstruction::kStore:
case ByteCodeInstruction::kStoreGlobal:
case ByteCodeInstruction::kWriteExternal:
- return -1;
-
- case ByteCodeInstruction::kPop2:
- case ByteCodeInstruction::kStore2:
- case ByteCodeInstruction::kStoreGlobal2:
- case ByteCodeInstruction::kWriteExternal2:
- return -2;
-
- case ByteCodeInstruction::kPop3:
- case ByteCodeInstruction::kStore3:
- case ByteCodeInstruction::kStoreGlobal3:
- case ByteCodeInstruction::kWriteExternal3:
- return -3;
-
- case ByteCodeInstruction::kPop4:
- case ByteCodeInstruction::kStore4:
- case ByteCodeInstruction::kStoreGlobal4:
- case ByteCodeInstruction::kWriteExternal4:
- return -4;
-
- case ByteCodeInstruction::kPopN:
return -count;
// Consumes 'count' values, plus one for the 'address'
@@ -432,8 +373,6 @@
case ByteCodeInstruction::kCallExternal:
case ByteCodeInstruction::kMatrixToMatrix:
case ByteCodeInstruction::kMatrixMultiply:
- case ByteCodeInstruction::kReserve:
- case ByteCodeInstruction::kReturn:
case ByteCodeInstruction::kScalarToMatrix:
case ByteCodeInstruction::kSwizzle:
return count;
@@ -446,16 +385,10 @@
case ByteCodeInstruction::kSampleMatrix: return 4 - 9;
// kMix does a 3 -> 1 reduction (A, B, M -> A -or- B) for each component
- case ByteCodeInstruction::kMix: return -2;
- case ByteCodeInstruction::kMix2: return -4;
- case ByteCodeInstruction::kMix3: return -6;
- case ByteCodeInstruction::kMix4: return -8;
+ case ByteCodeInstruction::kMix: return -(2 * count);
// kLerp works the same way (producing lerp(A, B, T) for each component)
- case ByteCodeInstruction::kLerp: return -2;
- case ByteCodeInstruction::kLerp2: return -4;
- case ByteCodeInstruction::kLerp3: return -6;
- case ByteCodeInstruction::kLerp4: return -8;
+ case ByteCodeInstruction::kLerp: return -(2 * count);
// kCall is net-zero. Max stack depth is adjusted in writeFunctionCall.
case ByteCodeInstruction::kCall: return 0;
@@ -583,7 +516,7 @@
if (offset != 0) {
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(offset);
- this->write(ByteCodeInstruction::kAddI);
+ this->write(ByteCodeInstruction::kAddI, 1);
}
return baseLoc;
} else {
@@ -617,7 +550,7 @@
if (stride != 1) {
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(stride);
- this->write(ByteCodeInstruction::kMultiplyI);
+ this->write(ByteCodeInstruction::kMultiplyI, 1);
}
}
Location baseLoc = this->getLocation(*i.fBase);
@@ -643,7 +576,7 @@
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(offset);
}
- this->write(ByteCodeInstruction::kAddI);
+ this->write(ByteCodeInstruction::kAddI, 1);
return baseLoc.makeOnStack();
}
case Expression::kSwizzle_Kind: {
@@ -655,7 +588,7 @@
if (offset != 0) {
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(offset);
- this->write(ByteCodeInstruction::kAddI);
+ this->write(ByteCodeInstruction::kAddI, 1);
}
return baseLoc;
} else {
@@ -701,33 +634,37 @@
this->write16((uint16_t)i);
fStackCount += StackUsage(i, count);
fMaxStackCount = std::max(fMaxStackCount, fStackCount);
+
+ // Most ops have an explicit count byte after them (passed here as 'count')
+ // Ops that don't have a count byte pass the default (kUnusedStackCount)
+ // There are a handful of strange ops that pass in a computed stack delta as count, but where
+ // that value should *not* be written as a count byte (it may even be negative!)
+ if (count != kUnusedStackCount) {
+ switch (i) {
+ // Odd instructions that have a non-default count, but we shouldn't write it
+ case ByteCodeInstruction::kCallExternal:
+ case ByteCodeInstruction::kMatrixToMatrix:
+ case ByteCodeInstruction::kMatrixMultiply:
+ case ByteCodeInstruction::kScalarToMatrix:
+ case ByteCodeInstruction::kSwizzle:
+ break;
+ default:
+ this->write8(count);
+ break;
+ }
+ }
}
-static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
- SkASSERT(count >= 1 && count <= 4);
- return ((ByteCodeInstruction) ((int) base + 1 - count));
-}
-
-void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCodeInstruction s,
- ByteCodeInstruction u, ByteCodeInstruction f,
+void ByteCodeGenerator::writeTypedInstruction(const Type& type,
+ ByteCodeInstruction s,
+ ByteCodeInstruction u,
+ ByteCodeInstruction f,
int count) {
switch (type_category(type)) {
case TypeCategory::kBool:
- case TypeCategory::kSigned:
- this->write(vector_instruction(s, count));
- break;
- case TypeCategory::kUnsigned:
- this->write(vector_instruction(u, count));
- break;
- case TypeCategory::kFloat: {
- if (count > 4) {
- this->write((ByteCodeInstruction)((int)f + 1 - 5), count);
- this->write8(count);
- } else {
- this->write(vector_instruction(f, count));
- }
- break;
- }
+ case TypeCategory::kSigned: this->write(s, count); break;
+ case TypeCategory::kUnsigned: this->write(u, count); break;
+ case TypeCategory::kFloat: this->write(f, count); break;
default:
SkASSERT(false);
}
@@ -756,7 +693,7 @@
op = b.fOperator;
if (!lVecOrMtx && rVecOrMtx) {
for (int i = SlotCount(rType); i > 1; --i) {
- this->write(ByteCodeInstruction::kDup);
+ this->write(ByteCodeInstruction::kDup, 1);
}
}
}
@@ -765,25 +702,25 @@
switch (op) {
case Token::Kind::TK_LOGICALAND: {
SkASSERT(tc == SkSL::TypeCategory::kBool && count == 1);
- this->write(ByteCodeInstruction::kDup);
+ this->write(ByteCodeInstruction::kDup, 1);
this->write(ByteCodeInstruction::kMaskPush);
this->write(ByteCodeInstruction::kBranchIfAllFalse);
DeferredLocation falseLocation(this);
this->writeExpression(*b.fRight);
- this->write(ByteCodeInstruction::kAndB);
+ this->write(ByteCodeInstruction::kAndB, 1);
falseLocation.set();
this->write(ByteCodeInstruction::kMaskPop);
return false;
}
case Token::Kind::TK_LOGICALOR: {
SkASSERT(tc == SkSL::TypeCategory::kBool && count == 1);
- this->write(ByteCodeInstruction::kDup);
- this->write(ByteCodeInstruction::kNotB);
+ this->write(ByteCodeInstruction::kDup, 1);
+ this->write(ByteCodeInstruction::kNotB, 1);
this->write(ByteCodeInstruction::kMaskPush);
this->write(ByteCodeInstruction::kBranchIfAllFalse);
DeferredLocation falseLocation(this);
this->writeExpression(*b.fRight);
- this->write(ByteCodeInstruction::kOrB);
+ this->write(ByteCodeInstruction::kOrB, 1);
falseLocation.set();
this->write(ByteCodeInstruction::kMaskPop);
return false;
@@ -819,7 +756,7 @@
this->writeExpression(*b.fRight);
if (lVecOrMtx && !rVecOrMtx) {
for (int i = SlotCount(lType); i > 1; --i) {
- this->write(ByteCodeInstruction::kDup);
+ this->write(ByteCodeInstruction::kDup, 1);
}
}
// Special case for M*V, V*M, M*M (but not V*V!)
@@ -849,7 +786,7 @@
count);
// Collapse to a single bool
for (int i = count; i > 1; --i) {
- this->write(ByteCodeInstruction::kAndB);
+ this->write(ByteCodeInstruction::kAndB, 1);
}
break;
case Token::Kind::TK_GT:
@@ -889,7 +826,7 @@
count);
// Collapse to a single bool
for (int i = count; i > 1; --i) {
- this->write(ByteCodeInstruction::kOrB);
+ this->write(ByteCodeInstruction::kOrB, 1);
}
break;
case Token::Kind::TK_PERCENT:
@@ -918,24 +855,21 @@
break;
case Token::Kind::TK_LOGICALXOR:
- SkASSERT(tc == SkSL::TypeCategory::kBool && count == 1);
- this->write(ByteCodeInstruction::kXorB);
+ SkASSERT(tc == SkSL::TypeCategory::kBool);
+ this->write(ByteCodeInstruction::kXorB, count);
break;
case Token::Kind::TK_BITWISEAND:
- SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned ||
- tc == SkSL::TypeCategory::kUnsigned));
- this->write(ByteCodeInstruction::kAndB);
+ SkASSERT(tc == SkSL::TypeCategory::kSigned || tc == SkSL::TypeCategory::kUnsigned);
+ this->write(ByteCodeInstruction::kAndB, count);
break;
case Token::Kind::TK_BITWISEOR:
- SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned ||
- tc == SkSL::TypeCategory::kUnsigned));
- this->write(ByteCodeInstruction::kOrB);
+ SkASSERT(tc == SkSL::TypeCategory::kSigned || tc == SkSL::TypeCategory::kUnsigned);
+ this->write(ByteCodeInstruction::kOrB, count);
break;
case Token::Kind::TK_BITWISEXOR:
- SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned ||
- tc == SkSL::TypeCategory::kUnsigned));
- this->write(ByteCodeInstruction::kXorB);
+ SkASSERT(tc == SkSL::TypeCategory::kSigned || tc == SkSL::TypeCategory::kUnsigned);
+ this->write(ByteCodeInstruction::kXorB, count);
break;
default:
@@ -972,13 +906,13 @@
if (inCategory == TypeCategory::kFloat) {
SkASSERT(outCategory == TypeCategory::kSigned ||
outCategory == TypeCategory::kUnsigned);
- this->write(vector_instruction(ByteCodeInstruction::kConvertFtoI, outCount));
+ this->write(ByteCodeInstruction::kConvertFtoI, outCount);
} else if (outCategory == TypeCategory::kFloat) {
if (inCategory == TypeCategory::kSigned) {
- this->write(vector_instruction(ByteCodeInstruction::kConvertStoF, outCount));
+ this->write(ByteCodeInstruction::kConvertStoF, outCount);
} else {
SkASSERT(inCategory == TypeCategory::kUnsigned);
- this->write(vector_instruction(ByteCodeInstruction::kConvertUtoF, outCount));
+ this->write(ByteCodeInstruction::kConvertUtoF, outCount);
}
} else {
SkASSERT(false);
@@ -1000,7 +934,7 @@
} else {
SkASSERT(outType.kind() == Type::kVector_Kind);
for (; inCount != outCount; ++inCount) {
- this->write(ByteCodeInstruction::kDup);
+ this->write(ByteCodeInstruction::kDup, 1);
}
}
}
@@ -1025,7 +959,7 @@
void ByteCodeGenerator::writeExternalValue(const ExternalValueReference& e) {
int count = SlotCount(e.fValue->type());
- this->write(vector_instruction(ByteCodeInstruction::kReadExternal, count));
+ this->write(ByteCodeInstruction::kReadExternal, count);
int index = fOutput->fExternalValues.size();
fOutput->fExternalValues.push_back(e.fValue);
SkASSERT(index <= 255);
@@ -1056,18 +990,12 @@
ByteCodeInstruction::kLoadExtendedGlobal,
ByteCodeInstruction::kLoadExtendedUniform),
count);
- this->write8(count);
} else {
- while (count) {
- int loadCount = std::min(count, 4);
- this->write(vector_instruction(location.selectLoad(ByteCodeInstruction::kLoad,
- ByteCodeInstruction::kLoadGlobal,
- ByteCodeInstruction::kLoadUniform),
- loadCount));
- this->write8(location.fSlot);
- count -= loadCount;
- location.fSlot += loadCount;
- }
+ this->write(location.selectLoad(ByteCodeInstruction::kLoad,
+ ByteCodeInstruction::kLoadGlobal,
+ ByteCodeInstruction::kLoadUniform),
+ count);
+ this->write8(location.fSlot);
}
}
@@ -1102,7 +1030,7 @@
auto dupSmallerType = [count, this](int smallCount) {
SkASSERT(smallCount == 1 || smallCount == count);
for (int i = smallCount; i < count; ++i) {
- this->write(ByteCodeInstruction::kDup);
+ this->write(ByteCodeInstruction::kDup, 1);
}
};
@@ -1179,33 +1107,33 @@
switch (intrin.special) {
case SpecialIntrinsic::kAll: {
for (int i = count-1; i --> 0;) {
- this->write(ByteCodeInstruction::kAndB);
+ this->write(ByteCodeInstruction::kAndB, 1);
}
} break;
case SpecialIntrinsic::kAny: {
for (int i = count-1; i --> 0;) {
- this->write(ByteCodeInstruction::kOrB);
+ this->write(ByteCodeInstruction::kOrB, 1);
}
} break;
case SpecialIntrinsic::kDot: {
SkASSERT(c.fArguments.size() == 2);
SkASSERT(count == SlotCount(c.fArguments[1]->fType));
- this->write(vector_instruction(ByteCodeInstruction::kMultiplyF, count));
+ this->write(ByteCodeInstruction::kMultiplyF, count);
for (int i = count-1; i --> 0;) {
- this->write(ByteCodeInstruction::kAddF);
+ this->write(ByteCodeInstruction::kAddF, 1);
}
} break;
case SpecialIntrinsic::kLength: {
SkASSERT(c.fArguments.size() == 1);
- this->write(vector_instruction(ByteCodeInstruction::kDup , count));
- this->write(vector_instruction(ByteCodeInstruction::kMultiplyF, count));
+ this->write(ByteCodeInstruction::kDup, count);
+ this->write(ByteCodeInstruction::kMultiplyF, count);
for (int i = count-1; i --> 0;) {
- this->write(ByteCodeInstruction::kAddF);
+ this->write(ByteCodeInstruction::kAddF, 1);
}
- this->write(ByteCodeInstruction::kSqrt);
+ this->write(ByteCodeInstruction::kSqrt, 1);
} break;
case SpecialIntrinsic::kMax:
@@ -1237,25 +1165,25 @@
if (is_generic_type(&c.fArguments[2]->fType, fContext.fGenBType_Type.get())) {
// mix(genType, genType, genBoolType)
SkASSERT(selectorCount == count);
- this->write(vector_instruction(ByteCodeInstruction::kMix, count));
+ this->write(ByteCodeInstruction::kMix, count);
} else {
// mix(genType, genType, genType) or mix(genType, genType, float)
dupSmallerType(selectorCount);
- this->write(vector_instruction(ByteCodeInstruction::kLerp, count));
+ this->write(ByteCodeInstruction::kLerp, count);
}
} break;
case SpecialIntrinsic::kNormalize: {
SkASSERT(c.fArguments.size() == 1);
- this->write(vector_instruction(ByteCodeInstruction::kDup , count));
- this->write(vector_instruction(ByteCodeInstruction::kDup , count));
- this->write(vector_instruction(ByteCodeInstruction::kMultiplyF, count));
+ this->write(ByteCodeInstruction::kDup, count);
+ this->write(ByteCodeInstruction::kDup, count);
+ this->write(ByteCodeInstruction::kMultiplyF, count);
for (int i = count-1; i --> 0;) {
- this->write(ByteCodeInstruction::kAddF);
+ this->write(ByteCodeInstruction::kAddF, 1);
}
- this->write(ByteCodeInstruction::kSqrt);
+ this->write(ByteCodeInstruction::kSqrt, 1);
dupSmallerType(1);
- this->write(vector_instruction(ByteCodeInstruction::kDivideF, count));
+ this->write(ByteCodeInstruction::kDivideF, count);
} break;
default:
@@ -1277,8 +1205,11 @@
}
default:
- this->writeTypedInstruction(c.fArguments[0]->fType, intrin.inst_s, intrin.inst_u,
- intrin.inst_f, count);
+ this->writeTypedInstruction(c.fArguments[0]->fType,
+ intrin.inst_s,
+ intrin.inst_u,
+ intrin.inst_f,
+ count);
break;
}
}
@@ -1311,7 +1242,6 @@
// We may need to deal with out parameters, so the sequence is tricky
if (int returnCount = SlotCount(f.fType)) {
this->write(ByteCodeInstruction::kReserve, returnCount);
- this->write8(returnCount);
}
int argCount = f.fArguments.size();
@@ -1343,11 +1273,8 @@
// counts for all parameters that aren't out-params, so we can pop them in one big chunk.
int popCount = 0;
auto pop = [&]() {
- if (popCount > 4) {
- this->write(ByteCodeInstruction::kPopN, popCount);
- this->write8(popCount);
- } else if (popCount > 0) {
- this->write(vector_instruction(ByteCodeInstruction::kPop, popCount));
+ if (popCount > 0) {
+ this->write(ByteCodeInstruction::kPop, popCount);
}
popCount = 0;
};
@@ -1419,7 +1346,7 @@
(p.fOperator == Token::Kind::TK_BITWISENOT && (tc == TypeCategory::kSigned ||
tc == TypeCategory::kUnsigned)));
this->writeExpression(*p.fOperand);
- this->write(ByteCodeInstruction::kNotB);
+ this->write(ByteCodeInstruction::kNotB, 1);
break;
}
default:
@@ -1437,7 +1364,7 @@
lvalue->load();
// If we're not supposed to discard the result, then make a copy *before* the +/-
if (!discard) {
- this->write(ByteCodeInstruction::kDup);
+ this->write(ByteCodeInstruction::kDup, 1);
}
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(type_category(p.fType) == TypeCategory::kFloat ? float_to_bits(1.0f) : 1);
@@ -1491,7 +1418,6 @@
this->write(ByteCodeInstruction::kMaskNegate);
this->writeExpression(*t.fIfFalse);
this->write(ByteCodeInstruction::kMaskBlend, count);
- this->write8(count);
}
void ByteCodeGenerator::writeExpression(const Expression& e, bool discard) {
@@ -1548,11 +1474,8 @@
}
if (discard) {
int count = SlotCount(e.fType);
- if (count > 4) {
- this->write(ByteCodeInstruction::kPopN, count);
- this->write8(count);
- } else if (count != 0) {
- this->write(vector_instruction(ByteCodeInstruction::kPop, count));
+ if (count > 0) {
+ this->write(ByteCodeInstruction::kPop, count);
}
discard = false;
}
@@ -1566,15 +1489,15 @@
, fIndex(index) {}
void load() override {
- fGenerator.write(vector_instruction(ByteCodeInstruction::kReadExternal, fCount));
+ fGenerator.write(ByteCodeInstruction::kReadExternal, fCount);
fGenerator.write8(fIndex);
}
void store(bool discard) override {
if (!discard) {
- fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, fCount));
+ fGenerator.write(ByteCodeInstruction::kDup, fCount);
}
- fGenerator.write(vector_instruction(ByteCodeInstruction::kWriteExternal, fCount));
+ fGenerator.write(ByteCodeInstruction::kWriteExternal, fCount);
fGenerator.write8(fIndex);
}
@@ -1582,7 +1505,6 @@
typedef LValue INHERITED;
int fCount;
-
int fIndex;
};
@@ -1599,7 +1521,7 @@
void store(bool discard) override {
int count = fSwizzle.fComponents.size();
if (!discard) {
- fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count));
+ fGenerator.write(ByteCodeInstruction::kDup, count);
}
// We already have the correct number of values on the stack, thanks to type checking.
// The algorithm: Walk down the values on the stack, doing 'count' single-element stores.
@@ -1615,16 +1537,16 @@
ByteCodeGenerator::Location location = fGenerator.getLocation(*fSwizzle.fBase);
if (!location.isOnStack()) {
fGenerator.write(location.selectStore(ByteCodeInstruction::kStore,
- ByteCodeInstruction::kStoreGlobal));
+ ByteCodeInstruction::kStoreGlobal),
+ 1);
fGenerator.write8(location.fSlot + fSwizzle.fComponents[i]);
} else {
fGenerator.write(ByteCodeInstruction::kPushImmediate);
fGenerator.write32(fSwizzle.fComponents[i]);
- fGenerator.write(ByteCodeInstruction::kAddI);
+ fGenerator.write(ByteCodeInstruction::kAddI, 1);
fGenerator.write(location.selectStore(ByteCodeInstruction::kStoreExtended,
ByteCodeInstruction::kStoreExtendedGlobal),
1);
- fGenerator.write8(1);
}
}
}
@@ -1648,28 +1570,17 @@
void store(bool discard) override {
int count = ByteCodeGenerator::SlotCount(fExpression.fType);
if (!discard) {
- if (count > 4) {
- fGenerator.write(ByteCodeInstruction::kDupN, count);
- fGenerator.write8(count);
- } else {
- fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count));
- }
+ fGenerator.write(ByteCodeInstruction::kDup, count);
}
ByteCodeGenerator::Location location = fGenerator.getLocation(fExpression);
- if (location.isOnStack() || count > 4) {
- if (!location.isOnStack()) {
- fGenerator.write(ByteCodeInstruction::kPushImmediate);
- fGenerator.write32(location.fSlot);
- }
+ if (location.isOnStack()) {
fGenerator.write(location.selectStore(ByteCodeInstruction::kStoreExtended,
ByteCodeInstruction::kStoreExtendedGlobal),
count);
- fGenerator.write8(count);
} else {
- fGenerator.write(
- vector_instruction(location.selectStore(ByteCodeInstruction::kStore,
- ByteCodeInstruction::kStoreGlobal),
- count));
+ fGenerator.write(location.selectStore(ByteCodeInstruction::kStore,
+ ByteCodeInstruction::kStoreGlobal),
+ count);
fGenerator.write8(location.fSlot);
}
}
@@ -1811,8 +1722,7 @@
// we account for those in writeFunction().
// This is all fine because we don't allow conditional returns, so we only return once anyway.
- this->write(ByteCodeInstruction::kReturn, -count);
- this->write8(count);
+ this->write(ByteCodeInstruction::kReturn, count);
}
void ByteCodeGenerator::writeSwitchStatement(const SwitchStatement& r) {
@@ -1828,15 +1738,8 @@
if (decl.fValue) {
this->writeExpression(*decl.fValue);
int count = SlotCount(decl.fValue->fType);
- if (count > 4) {
- this->write(ByteCodeInstruction::kPushImmediate);
- this->write32(location.fSlot);
- this->write(ByteCodeInstruction::kStoreExtended, count);
- this->write8(count);
- } else {
- this->write(vector_instruction(ByteCodeInstruction::kStore, count));
- this->write8(location.fSlot);
- }
+ this->write(ByteCodeInstruction::kStore, count);
+ this->write8(location.fSlot);
}
}
}