byte align everything in SkSLInterpreter
It's nicer to write code without having to think about alignment,
and this appears to be faster too:
$ ninja -C out nanobench && out/nanobench --config 8888 -m GM_runtime_cf_interp_1 --loops 0
Before: 24/24 MB 1 18.4ms 18.5ms 18.5ms 18.6ms 0% █▆▅▅▅▁▅▅▅▅ 8888 GM_runtime_cf_interp_1
After: 23/23 MB 1 16.6ms 16.6ms 16.6ms 16.7ms 0% ▁▁▃█▅▂▁▁█▅ 8888 GM_runtime_cf_interp_1
While byte-aligning things I noticed the write16 and write32 calls could
do all their bytes at once, in one call to resize() instead of 2-4 calls
push_back.
Looking at that disassembly, I noticed vector_instruction can be static.
Change-Id: I22985b49d6745797da10bbd6b6f2002a7618f2ae
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/214338
Reviewed-by: Brian Osman <brianosman@google.com>
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index 5c877a7..91a2d70 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -161,40 +161,27 @@
}
}
-void ByteCodeGenerator::align(int divisor, int remainder) {
- switch (remainder - (int) fCode->size() % divisor) {
- case 0: return;
- case 3: this->write(ByteCodeInstruction::kNop3); // fall through
- case 2: this->write(ByteCodeInstruction::kNop2); // fall through
- case 1: this->write(ByteCodeInstruction::kNop1);
- break;
- default: SkASSERT(false);
- }
-}
-
void ByteCodeGenerator::write8(uint8_t b) {
fCode->push_back(b);
}
void ByteCodeGenerator::write16(uint16_t i) {
- SkASSERT(fCode->size() % 2 == 0);
- this->write8(i >> 0);
- this->write8(i >> 8);
+ size_t n = fCode->size();
+ fCode->resize(n+2);
+ memcpy(fCode->data() + n, &i, 2);
}
void ByteCodeGenerator::write32(uint32_t i) {
- SkASSERT(fCode->size() % 4 == 0);
- this->write8((i >> 0) & 0xFF);
- this->write8((i >> 8) & 0xFF);
- this->write8((i >> 16) & 0xFF);
- this->write8((i >> 24) & 0xFF);
+ size_t n = fCode->size();
+ fCode->resize(n+4);
+ memcpy(fCode->data() + n, &i, 4);
}
void ByteCodeGenerator::write(ByteCodeInstruction i) {
this->write8((uint8_t) i);
}
-ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
+static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
return ((ByteCodeInstruction) ((int) base + count - 1));
}
@@ -323,7 +310,6 @@
}
void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) {
- this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(b.fValue ? 1 : 0);
}
@@ -378,7 +364,6 @@
}
void ByteCodeGenerator::writeFloatLiteral(const FloatLiteral& f) {
- this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(Interpreter::Value((float) f.fValue).fUnsigned);
}
@@ -397,7 +382,6 @@
}
void ByteCodeGenerator::writeIntLiteral(const IntLiteral& i) {
- this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(i.fValue);
}
@@ -414,7 +398,6 @@
SkASSERT(slot_count(p.fOperand->fType) == 1);
std::unique_ptr<LValue> lvalue = this->getLValue(*p.fOperand);
lvalue->load();
- this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(type_category(p.fType) == TypeCategory::kFloat
? Interpreter::Value(1.0f).fUnsigned : 1);
@@ -518,11 +501,9 @@
void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t) {
this->writeExpression(*t.fTest);
- this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation trueLocation(this);
this->writeExpression(*t.fIfFalse);
- this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
DeferredLocation endLocation(this);
trueLocation.set();
@@ -720,13 +701,11 @@
}
void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) {
- this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
fBreakTargets.top().emplace_back(this);
}
void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) {
- this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
fContinueTargets.top().emplace_back(this);
}
@@ -738,7 +717,6 @@
this->writeStatement(*d.fStatement);
this->setContinueTargets();
this->writeExpression(*d.fTest);
- this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
this->write16(start);
this->setBreakTargets();
@@ -754,7 +732,6 @@
if (f.fTest) {
this->writeExpression(*f.fTest);
this->write(ByteCodeInstruction::kNot);
- this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*f.fStatement);
@@ -763,7 +740,6 @@
this->writeExpression(*f.fNext);
this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType)));
}
- this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
endLocation.set();
@@ -774,7 +750,6 @@
this->writeExpression(*f.fNext);
this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType)));
}
- this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
}
@@ -785,11 +760,9 @@
if (i.fIfFalse) {
// if (test) { ..ifTrue.. } else { .. ifFalse .. }
this->writeExpression(*i.fTest);
- this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation trueLocation(this);
this->writeStatement(*i.fIfFalse);
- this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
DeferredLocation endLocation(this);
trueLocation.set();
@@ -799,7 +772,6 @@
// if (test) { ..ifTrue.. }
this->writeExpression(*i.fTest);
this->write(ByteCodeInstruction::kNot);
- this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*i.fIfTrue);
@@ -839,12 +811,10 @@
size_t start = fCode->size();
this->writeExpression(*w.fTest);
this->write(ByteCodeInstruction::kNot);
- this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*w.fStatement);
this->setContinueTargets();
- this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
endLocation.set();