SkSL interpreter performance improvements
Bug: skia:
Change-Id: Ib7aff4a5b159d8ec3b4b8bb96cee62ed5d277a04
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/211641
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
diff --git a/src/sksl/SkSLByteCode.h b/src/sksl/SkSLByteCode.h
index 92a2cca..327b20e 100644
--- a/src/sksl/SkSLByteCode.h
+++ b/src/sksl/SkSLByteCode.h
@@ -14,6 +14,7 @@
enum class ByteCodeInstruction : uint8_t {
kInvalid,
+ kNop,
// B = bool, F = float, I = int, S = signed, U = unsigned
kAddF,
kAddI,
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index f918ef6..5bf18aa 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -145,20 +145,28 @@
}
}
+void ByteCodeGenerator::align(int divisor, int remainder) {
+ while ((int) fCode->size() % divisor != remainder) {
+ this->write(ByteCodeInstruction::kNop);
+ }
+}
+
void ByteCodeGenerator::write8(uint8_t b) {
fCode->push_back(b);
}
void ByteCodeGenerator::write16(uint16_t i) {
+ SkASSERT(fCode->size() % 2 == 0);
+ this->write8(i >> 0);
this->write8(i >> 8);
- this->write8(i);
}
void ByteCodeGenerator::write32(uint32_t i) {
- this->write8((i >> 24) & 0xFF);
- this->write8((i >> 16) & 0xFF);
- this->write8((i >> 8) & 0xFF);
+ SkASSERT(fCode->size() % 4 == 0);
this->write8((i >> 0) & 0xFF);
+ this->write8((i >> 8) & 0xFF);
+ this->write8((i >> 16) & 0xFF);
+ this->write8((i >> 24) & 0xFF);
}
void ByteCodeGenerator::write(ByteCodeInstruction i) {
@@ -286,6 +294,7 @@
}
void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) {
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(1);
}
@@ -333,6 +342,7 @@
}
void ByteCodeGenerator::writeFloatLiteral(const FloatLiteral& f) {
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
union { float f; uint32_t u; } pun = { (float) f.fValue };
this->write32(pun.u);
@@ -349,6 +359,7 @@
}
void ByteCodeGenerator::writeIntLiteral(const IntLiteral& i) {
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(i.fValue);
}
@@ -364,6 +375,7 @@
case Token::Kind::MINUSMINUS: {
std::unique_ptr<LValue> lvalue = this->getLValue(*p.fOperand);
lvalue->load();
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(1);
if (p.fOperator == Token::Kind::PLUSPLUS) {
@@ -403,6 +415,7 @@
case Expression::kVariableReference_Kind: {
const Variable& var = ((VariableReference&) *s.fBase).fVariable;
int location = this->getLocation(var);
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(location);
this->write(ByteCodeInstruction::kLoadSwizzle);
@@ -430,6 +443,7 @@
SkASSERT(location <= 255);
this->write8(location);
} else {
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(this->getLocation(v.fVariable));
int count = slot_count(v.fType);
@@ -499,6 +513,7 @@
void ByteCodeGenerator::writeTarget(const Expression& e) {
switch (e.fKind) {
case Expression::kVariableReference_Kind:
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(this->getLocation(((VariableReference&) e).fVariable));
break;
@@ -547,6 +562,7 @@
: INHERITED(*generator)
, fCount(slot_count(var.fType))
, fIsGlobal(var.fStorage == Variable::kGlobal_Storage) {
+ fGenerator.align(4, 3);
fGenerator.write(ByteCodeInstruction::kPushImmediate);
fGenerator.write32(generator->getLocation(var));
}
@@ -617,11 +633,13 @@
}
void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) {
+ this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
fBreakTargets.top().emplace_back(this);
}
void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) {
+ this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
fContinueTargets.top().emplace_back(this);
}
@@ -633,6 +651,7 @@
this->writeStatement(*d.fStatement);
this->setContinueTargets();
this->writeExpression(*d.fTest);
+ this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
this->write16(start);
this->setBreakTargets();
@@ -648,6 +667,7 @@
if (f.fTest) {
this->writeExpression(*f.fTest);
this->write(ByteCodeInstruction::kNot);
+ this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*f.fStatement);
@@ -657,6 +677,7 @@
this->write(ByteCodeInstruction::kPop);
this->write8(slot_count(f.fNext->fType));
}
+ this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
endLocation.set();
@@ -668,6 +689,7 @@
this->write(ByteCodeInstruction::kPop);
this->write8(slot_count(f.fNext->fType));
}
+ this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
}
@@ -677,9 +699,11 @@
void ByteCodeGenerator::writeIfStatement(const IfStatement& i) {
this->writeExpression(*i.fTest);
this->write(ByteCodeInstruction::kNot);
+ this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation elseLocation(this);
this->writeStatement(*i.fIfTrue);
+ this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
DeferredLocation endLocation(this);
elseLocation.set();
@@ -707,6 +731,7 @@
// has been allocated
int location = getLocation(*decl.fVar);
if (decl.fValue) {
+ this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(location);
this->writeExpression(*decl.fValue);
@@ -726,10 +751,12 @@
size_t start = fCode->size();
this->writeExpression(*w.fTest);
this->write(ByteCodeInstruction::kNot);
+ this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*w.fStatement);
this->setContinueTargets();
+ this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
endLocation.set();
diff --git a/src/sksl/SkSLByteCodeGenerator.h b/src/sksl/SkSLByteCodeGenerator.h
index d89d578..497d462 100644
--- a/src/sksl/SkSLByteCodeGenerator.h
+++ b/src/sksl/SkSLByteCodeGenerator.h
@@ -83,6 +83,8 @@
bool generateCode() override;
+ void align(int divisor, int remainder);
+
void write8(uint8_t b);
void write16(uint16_t b);
@@ -122,8 +124,8 @@
void set() {
int target = fGenerator.fCode->size();
SkASSERT(target <= 65535);
- (*fGenerator.fCode)[fOffset] = target >> 8;
- (*fGenerator.fCode)[fOffset + 1] = target;
+ (*fGenerator.fCode)[fOffset] = target;
+ (*fGenerator.fCode)[fOffset + 1] = target >> 8;
#ifdef SK_DEBUG
fSet = true;
#endif
diff --git a/src/sksl/SkSLInterpreter.cpp b/src/sksl/SkSLInterpreter.cpp
index 6b17e59..d4575b3 100644
--- a/src/sksl/SkSLInterpreter.cpp
+++ b/src/sksl/SkSLInterpreter.cpp
@@ -75,17 +75,15 @@
}
uint16_t Interpreter::read16() {
- uint16_t result = (fCurrentFunction->fCode[fIP ] << 8) +
- fCurrentFunction->fCode[fIP + 1];
+ SkASSERT(fIP % 2 == 0);
+ uint16_t result = *(uint16_t*) &fCurrentFunction->fCode[fIP];
fIP += 2;
return result;
}
uint32_t Interpreter::read32() {
- uint32_t result = (fCurrentFunction->fCode[fIP] << 24) +
- (fCurrentFunction->fCode[fIP + 1] << 16) +
- (fCurrentFunction->fCode[fIP + 2] << 8) +
- fCurrentFunction->fCode[fIP + 3];
+ SkASSERT(fIP % 4 == 0);
+ uint32_t result = *(uint32_t*) &fCurrentFunction->fCode[fIP];
fIP += 4;
return result;
}
@@ -215,180 +213,6 @@
break; \
}
-void Interpreter::next() {
-#ifdef TRACE
- printf("at %d\n", fIP);
-#endif
- ByteCodeInstruction inst = (ByteCodeInstruction) this->read8();
- switch (inst) {
- BINARY_OP(kAddI, int32_t, fSigned, +)
- BINARY_OP(kAddF, float, fFloat, +)
- case ByteCodeInstruction::kBranch:
- fIP = this->read16();
- break;
- BINARY_OP(kCompareIEQ, int32_t, fSigned, ==)
- BINARY_OP(kCompareFEQ, float, fFloat, ==)
- BINARY_OP(kCompareINEQ, int32_t, fSigned, !=)
- BINARY_OP(kCompareFNEQ, float, fFloat, !=)
- BINARY_OP(kCompareSGT, int32_t, fSigned, >)
- BINARY_OP(kCompareUGT, uint32_t, fUnsigned, >)
- BINARY_OP(kCompareFGT, float, fFloat, >)
- BINARY_OP(kCompareSGTEQ, int32_t, fSigned, >=)
- BINARY_OP(kCompareUGTEQ, uint32_t, fUnsigned, >=)
- BINARY_OP(kCompareFGTEQ, float, fFloat, >=)
- BINARY_OP(kCompareSLT, int32_t, fSigned, <)
- BINARY_OP(kCompareULT, uint32_t, fUnsigned, <)
- BINARY_OP(kCompareFLT, float, fFloat, <)
- BINARY_OP(kCompareSLTEQ, int32_t, fSigned, <=)
- BINARY_OP(kCompareULTEQ, uint32_t, fUnsigned, <=)
- BINARY_OP(kCompareFLTEQ, float, fFloat, <=)
- case ByteCodeInstruction::kConditionalBranch: {
- int target = this->read16();
- if (this->pop().fBool) {
- fIP = target;
- }
- break;
- }
- case ByteCodeInstruction::kDebugPrint: {
- Value v = this->pop();
- printf("Debug: %d(int), %d(uint), %f(float)\n", v.fSigned, v.fUnsigned, v.fFloat);
- break;
- }
- BINARY_OP(kDivideS, int32_t, fSigned, /)
- BINARY_OP(kDivideU, uint32_t, fUnsigned, /)
- BINARY_OP(kDivideF, float, fFloat, /)
- case ByteCodeInstruction::kDup:
- this->push(fStack.back());
- break;
- case ByteCodeInstruction::kDupDown: {
- int count = this->read8();
- for (int i = 0; i < count; ++i) {
- fStack.insert(fStack.end() - i - count - 1, fStack[fStack.size() - i - 1]);
- }
- break;
- }
- case ByteCodeInstruction::kFloatToInt: {
- Value& top = fStack.back();
- top.fSigned = (int) top.fFloat;
- break;
- }
- case ByteCodeInstruction::kSignedToFloat: {
- Value& top = fStack.back();
- top.fFloat = (float) top.fSigned;
- break;
- }
- case ByteCodeInstruction::kUnsignedToFloat: {
- Value& top = fStack.back();
- top.fFloat = (float) top.fUnsigned;
- break;
- }
- case ByteCodeInstruction::kLoad: {
- int target = this->pop().fSigned;
- SkASSERT(target < (int) fStack.size());
- this->push(fStack[target]);
- break;
- }
- case ByteCodeInstruction::kLoadGlobal: {
- int target = this->read8();
- SkASSERT(target < (int) fGlobals.size());
- this->push(fGlobals[target]);
- break;
- }
- case ByteCodeInstruction::kLoadSwizzle: {
- Value target = this->pop();
- int count = read8();
- for (int i = 0; i < count; ++i) {
- SkASSERT(target.fSigned + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
- this->push(fStack[target.fSigned + fCurrentFunction->fCode[fIP + i]]);
- }
- fIP += count;
- break;
- }
- BINARY_OP(kMultiplyS, int32_t, fSigned, *)
- BINARY_OP(kMultiplyU, uint32_t, fUnsigned, *)
- BINARY_OP(kMultiplyF, float, fFloat, *)
- case ByteCodeInstruction::kNot: {
- Value& top = fStack.back();
- top.fBool = !top.fBool;
- break;
- }
- case ByteCodeInstruction::kNegateF: {
- Value& top = fStack.back();
- top.fFloat = -top.fFloat;
- break;
- }
- case ByteCodeInstruction::kNegateS: {
- Value& top = fStack.back();
- top.fSigned = -top.fSigned;
- break;
- }
- case ByteCodeInstruction::kPop:
- for (int i = read8(); i > 0; --i) {
- this->pop();
- }
- break;
- case ByteCodeInstruction::kPushImmediate:
- this->push(Value((int) read32()));
- break;
- BINARY_OP(kRemainderS, int32_t, fSigned, %)
- BINARY_OP(kRemainderU, uint32_t, fUnsigned, %)
- case ByteCodeInstruction::kReturn: {
- int count = this->read8();
- for (int i = 0; i < count; ++i) {
- fStack[i] = fStack[fStack.size() - count + i];
- }
- fIP = (int) fCurrentFunction->fCode.size();
- break;
- }
- case ByteCodeInstruction::kStore: {
- Value value = this->pop();
- int target = this->pop().fSigned;
- SkASSERT(target < (int) fStack.size());
- fStack[target] = value;
- break;
- }
- case ByteCodeInstruction::kStoreGlobal: {
- Value value = this->pop();
- int target = this->pop().fSigned;
- SkASSERT(target < (int) fGlobals.size());
- fGlobals[target] = value;
- break;
- }
- case ByteCodeInstruction::kStoreSwizzle: {
- int count = read8();
- int target = fStack[fStack.size() - count - 1].fSigned;
- for (int i = count - 1; i >= 0; --i) {
- SkASSERT(target + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
- fStack[target + fCurrentFunction->fCode[fIP + i]] = this->pop();
- }
- this->pop();
- fIP += count;
- break;
- }
- BINARY_OP(kSubtractI, int32_t, fSigned, -)
- BINARY_OP(kSubtractF, float, fFloat, -)
- case ByteCodeInstruction::kSwizzle: {
- Value vec[4];
- for (int i = this->read8() - 1; i >= 0; --i) {
- vec[i] = this->pop();
- }
- for (int i = this->read8() - 1; i >= 0; --i) {
- this->push(vec[this->read8()]);
- }
- break;
- }
- case ByteCodeInstruction::kVector:
- this->nextVector(this->read8());
- break;
- default:
- printf("unsupported instruction %d\n", (int) inst);
- SkASSERT(false);
- }
-#ifdef TRACE
- this->dumpStack();
-#endif
-}
-
static constexpr int VECTOR_MAX = 16;
#define VECTOR_BINARY_OP(inst, type, field, op) \
@@ -406,7 +230,7 @@
break; \
}
-void Interpreter::nextVector(int count) {
+void Interpreter::vectorOp(int count) {
ByteCodeInstruction inst = (ByteCodeInstruction) this->read8();
switch (inst) {
VECTOR_BINARY_OP(kAddI, int32_t, fSigned, +)
@@ -490,8 +314,6 @@
}
VECTOR_BINARY_OP(kSubtractI, int32_t, fSigned, -)
VECTOR_BINARY_OP(kSubtractF, float, fFloat, -)
- case ByteCodeInstruction::kVector:
- this->nextVector(this->read8());
default:
printf("unsupported instruction %d\n", (int) inst);
SkASSERT(false);
@@ -500,7 +322,179 @@
void Interpreter::run() {
while (fIP < (int) fCurrentFunction->fCode.size()) {
- next();
+#ifdef TRACE
+ printf("at %d\n", fIP);
+#endif
+ ByteCodeInstruction inst = (ByteCodeInstruction) this->read8();
+ switch (inst) {
+ BINARY_OP(kAddI, int32_t, fSigned, +)
+ BINARY_OP(kAddF, float, fFloat, +)
+ case ByteCodeInstruction::kBranch:
+ fIP = this->read16();
+ break;
+ BINARY_OP(kCompareIEQ, int32_t, fSigned, ==)
+ BINARY_OP(kCompareFEQ, float, fFloat, ==)
+ BINARY_OP(kCompareINEQ, int32_t, fSigned, !=)
+ BINARY_OP(kCompareFNEQ, float, fFloat, !=)
+ BINARY_OP(kCompareSGT, int32_t, fSigned, >)
+ BINARY_OP(kCompareUGT, uint32_t, fUnsigned, >)
+ BINARY_OP(kCompareFGT, float, fFloat, >)
+ BINARY_OP(kCompareSGTEQ, int32_t, fSigned, >=)
+ BINARY_OP(kCompareUGTEQ, uint32_t, fUnsigned, >=)
+ BINARY_OP(kCompareFGTEQ, float, fFloat, >=)
+ BINARY_OP(kCompareSLT, int32_t, fSigned, <)
+ BINARY_OP(kCompareULT, uint32_t, fUnsigned, <)
+ BINARY_OP(kCompareFLT, float, fFloat, <)
+ BINARY_OP(kCompareSLTEQ, int32_t, fSigned, <=)
+ BINARY_OP(kCompareULTEQ, uint32_t, fUnsigned, <=)
+ BINARY_OP(kCompareFLTEQ, float, fFloat, <=)
+ case ByteCodeInstruction::kConditionalBranch: {
+ int target = this->read16();
+ if (this->pop().fBool) {
+ fIP = target;
+ }
+ break;
+ }
+ case ByteCodeInstruction::kDebugPrint: {
+ Value v = this->pop();
+ printf("Debug: %d(int), %d(uint), %f(float)\n", v.fSigned, v.fUnsigned, v.fFloat);
+ break;
+ }
+ BINARY_OP(kDivideS, int32_t, fSigned, /)
+ BINARY_OP(kDivideU, uint32_t, fUnsigned, /)
+ BINARY_OP(kDivideF, float, fFloat, /)
+ case ByteCodeInstruction::kDup:
+ this->push(fStack.back());
+ break;
+ case ByteCodeInstruction::kDupDown: {
+ int count = this->read8();
+ for (int i = 0; i < count; ++i) {
+ fStack.insert(fStack.end() - i - count - 1, fStack[fStack.size() - i - 1]);
+ }
+ break;
+ }
+ case ByteCodeInstruction::kFloatToInt: {
+ Value& top = fStack.back();
+ top.fSigned = (int) top.fFloat;
+ break;
+ }
+ case ByteCodeInstruction::kSignedToFloat: {
+ Value& top = fStack.back();
+ top.fFloat = (float) top.fSigned;
+ break;
+ }
+ case ByteCodeInstruction::kUnsignedToFloat: {
+ Value& top = fStack.back();
+ top.fFloat = (float) top.fUnsigned;
+ break;
+ }
+ case ByteCodeInstruction::kLoad: {
+ int target = this->pop().fSigned;
+ SkASSERT(target < (int) fStack.size());
+ this->push(fStack[target]);
+ break;
+ }
+ case ByteCodeInstruction::kLoadGlobal: {
+ int target = this->read8();
+ SkASSERT(target < (int) fGlobals.size());
+ this->push(fGlobals[target]);
+ break;
+ }
+ case ByteCodeInstruction::kLoadSwizzle: {
+ Value target = this->pop();
+ int count = read8();
+ for (int i = 0; i < count; ++i) {
+ SkASSERT(target.fSigned + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
+ this->push(fStack[target.fSigned + fCurrentFunction->fCode[fIP + i]]);
+ }
+ fIP += count;
+ break;
+ }
+ BINARY_OP(kMultiplyS, int32_t, fSigned, *)
+ BINARY_OP(kMultiplyU, uint32_t, fUnsigned, *)
+ BINARY_OP(kMultiplyF, float, fFloat, *)
+ case ByteCodeInstruction::kNot: {
+ Value& top = fStack.back();
+ top.fBool = !top.fBool;
+ break;
+ }
+ case ByteCodeInstruction::kNegateF: {
+ Value& top = fStack.back();
+ top.fFloat = -top.fFloat;
+ break;
+ }
+ case ByteCodeInstruction::kNegateS: {
+ Value& top = fStack.back();
+ top.fSigned = -top.fSigned;
+ break;
+ }
+ case ByteCodeInstruction::kNop:
+ break;
+ case ByteCodeInstruction::kPop:
+ for (int i = read8(); i > 0; --i) {
+ this->pop();
+ }
+ break;
+ case ByteCodeInstruction::kPushImmediate:
+ this->push(Value((int) read32()));
+ break;
+ BINARY_OP(kRemainderS, int32_t, fSigned, %)
+ BINARY_OP(kRemainderU, uint32_t, fUnsigned, %)
+ case ByteCodeInstruction::kReturn: {
+ int count = this->read8();
+ for (int i = 0; i < count; ++i) {
+ fStack[i] = fStack[fStack.size() - count + i];
+ }
+ fIP = (int) fCurrentFunction->fCode.size();
+ break;
+ }
+ case ByteCodeInstruction::kStore: {
+ Value value = this->pop();
+ int target = this->pop().fSigned;
+ SkASSERT(target < (int) fStack.size());
+ fStack[target] = value;
+ break;
+ }
+ case ByteCodeInstruction::kStoreGlobal: {
+ Value value = this->pop();
+ int target = this->pop().fSigned;
+ SkASSERT(target < (int) fGlobals.size());
+ fGlobals[target] = value;
+ break;
+ }
+ case ByteCodeInstruction::kStoreSwizzle: {
+ int count = read8();
+ int target = fStack[fStack.size() - count - 1].fSigned;
+ for (int i = count - 1; i >= 0; --i) {
+ SkASSERT(target + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
+ fStack[target + fCurrentFunction->fCode[fIP + i]] = this->pop();
+ }
+ this->pop();
+ fIP += count;
+ break;
+ }
+ BINARY_OP(kSubtractI, int32_t, fSigned, -)
+ BINARY_OP(kSubtractF, float, fFloat, -)
+ case ByteCodeInstruction::kSwizzle: {
+ Value vec[4];
+ for (int i = this->read8() - 1; i >= 0; --i) {
+ vec[i] = this->pop();
+ }
+ for (int i = this->read8() - 1; i >= 0; --i) {
+ this->push(vec[this->read8()]);
+ }
+ break;
+ }
+ case ByteCodeInstruction::kVector:
+ this->vectorOp(this->read8());
+ break;
+ default:
+ printf("unsupported instruction %d\n", (int) inst);
+ SkASSERT(false);
+ }
+#ifdef TRACE
+ this->dumpStack();
+#endif
}
}
diff --git a/src/sksl/SkSLInterpreter.h b/src/sksl/SkSLInterpreter.h
index b91be2a..5324fd9 100644
--- a/src/sksl/SkSLInterpreter.h
+++ b/src/sksl/SkSLInterpreter.h
@@ -70,9 +70,7 @@
uint32_t read32();
- void next();
-
- void nextVector(int count);
+ inline void vectorOp(int count);
void run();