byte align everything in SkSLInterpreter

It's nicer to write code without having to think about alignment,
and this appears to be faster too:

    $ ninja -C out nanobench && out/nanobench --config 8888 -m GM_runtime_cf_interp_1 --loops 0
    Before:  24/24  MB	1	18.4ms	18.5ms	18.5ms	18.6ms	0%	█▆▅▅▅▁▅▅▅▅	8888	GM_runtime_cf_interp_1
    After:   23/23  MB	1	16.6ms	16.6ms	16.6ms	16.7ms	0%	▁▁▃█▅▂▁▁█▅	8888	GM_runtime_cf_interp_1

While byte-aligning things I noticed the write16 and write32 calls could
do all their bytes at once, in one call to resize() instead of 2-4 calls
push_back.

Looking at that disassembly, I noticed vector_instruction can be static.

Change-Id: I22985b49d6745797da10bbd6b6f2002a7618f2ae
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/214338
Reviewed-by: Brian Osman <brianosman@google.com>
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index 5c877a7..91a2d70 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -161,40 +161,27 @@
     }
 }
 
-void ByteCodeGenerator::align(int divisor, int remainder) {
-    switch (remainder - (int) fCode->size() % divisor) {
-        case 0: return;
-        case 3: this->write(ByteCodeInstruction::kNop3); // fall through
-        case 2: this->write(ByteCodeInstruction::kNop2); // fall through
-        case 1: this->write(ByteCodeInstruction::kNop1);
-                break;
-        default: SkASSERT(false);
-    }
-}
-
 void ByteCodeGenerator::write8(uint8_t b) {
     fCode->push_back(b);
 }
 
 void ByteCodeGenerator::write16(uint16_t i) {
-    SkASSERT(fCode->size() % 2 == 0);
-    this->write8(i >> 0);
-    this->write8(i >> 8);
+    size_t n = fCode->size();
+    fCode->resize(n+2);
+    memcpy(fCode->data() + n, &i, 2);
 }
 
 void ByteCodeGenerator::write32(uint32_t i) {
-    SkASSERT(fCode->size() % 4 == 0);
-    this->write8((i >>  0) & 0xFF);
-    this->write8((i >>  8) & 0xFF);
-    this->write8((i >> 16) & 0xFF);
-    this->write8((i >> 24) & 0xFF);
+    size_t n = fCode->size();
+    fCode->resize(n+4);
+    memcpy(fCode->data() + n, &i, 4);
 }
 
 void ByteCodeGenerator::write(ByteCodeInstruction i) {
     this->write8((uint8_t) i);
 }
 
-ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
+static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
     return ((ByteCodeInstruction) ((int) base + count - 1));
 }
 
@@ -323,7 +310,6 @@
 }
 
 void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) {
-    this->align(4, 3);
     this->write(ByteCodeInstruction::kPushImmediate);
     this->write32(b.fValue ? 1 : 0);
 }
@@ -378,7 +364,6 @@
 }
 
 void ByteCodeGenerator::writeFloatLiteral(const FloatLiteral& f) {
-    this->align(4, 3);
     this->write(ByteCodeInstruction::kPushImmediate);
     this->write32(Interpreter::Value((float) f.fValue).fUnsigned);
 }
@@ -397,7 +382,6 @@
 }
 
 void ByteCodeGenerator::writeIntLiteral(const IntLiteral& i) {
-    this->align(4, 3);
     this->write(ByteCodeInstruction::kPushImmediate);
     this->write32(i.fValue);
 }
@@ -414,7 +398,6 @@
             SkASSERT(slot_count(p.fOperand->fType) == 1);
             std::unique_ptr<LValue> lvalue = this->getLValue(*p.fOperand);
             lvalue->load();
-            this->align(4, 3);
             this->write(ByteCodeInstruction::kPushImmediate);
             this->write32(type_category(p.fType) == TypeCategory::kFloat
                             ? Interpreter::Value(1.0f).fUnsigned : 1);
@@ -518,11 +501,9 @@
 
 void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t) {
     this->writeExpression(*t.fTest);
-    this->align(2, 1);
     this->write(ByteCodeInstruction::kConditionalBranch);
     DeferredLocation trueLocation(this);
     this->writeExpression(*t.fIfFalse);
-    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     DeferredLocation endLocation(this);
     trueLocation.set();
@@ -720,13 +701,11 @@
 }
 
 void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) {
-    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     fBreakTargets.top().emplace_back(this);
 }
 
 void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) {
-    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     fContinueTargets.top().emplace_back(this);
 }
@@ -738,7 +717,6 @@
     this->writeStatement(*d.fStatement);
     this->setContinueTargets();
     this->writeExpression(*d.fTest);
-    this->align(2, 1);
     this->write(ByteCodeInstruction::kConditionalBranch);
     this->write16(start);
     this->setBreakTargets();
@@ -754,7 +732,6 @@
     if (f.fTest) {
         this->writeExpression(*f.fTest);
         this->write(ByteCodeInstruction::kNot);
-        this->align(2, 1);
         this->write(ByteCodeInstruction::kConditionalBranch);
         DeferredLocation endLocation(this);
         this->writeStatement(*f.fStatement);
@@ -763,7 +740,6 @@
             this->writeExpression(*f.fNext);
             this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType)));
         }
-        this->align(2, 1);
         this->write(ByteCodeInstruction::kBranch);
         this->write16(start);
         endLocation.set();
@@ -774,7 +750,6 @@
             this->writeExpression(*f.fNext);
             this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType)));
         }
-        this->align(2, 1);
         this->write(ByteCodeInstruction::kBranch);
         this->write16(start);
     }
@@ -785,11 +760,9 @@
     if (i.fIfFalse) {
         // if (test) { ..ifTrue.. } else { .. ifFalse .. }
         this->writeExpression(*i.fTest);
-        this->align(2, 1);
         this->write(ByteCodeInstruction::kConditionalBranch);
         DeferredLocation trueLocation(this);
         this->writeStatement(*i.fIfFalse);
-        this->align(2, 1);
         this->write(ByteCodeInstruction::kBranch);
         DeferredLocation endLocation(this);
         trueLocation.set();
@@ -799,7 +772,6 @@
         // if (test) { ..ifTrue.. }
         this->writeExpression(*i.fTest);
         this->write(ByteCodeInstruction::kNot);
-        this->align(2, 1);
         this->write(ByteCodeInstruction::kConditionalBranch);
         DeferredLocation endLocation(this);
         this->writeStatement(*i.fIfTrue);
@@ -839,12 +811,10 @@
     size_t start = fCode->size();
     this->writeExpression(*w.fTest);
     this->write(ByteCodeInstruction::kNot);
-    this->align(2, 1);
     this->write(ByteCodeInstruction::kConditionalBranch);
     DeferredLocation endLocation(this);
     this->writeStatement(*w.fStatement);
     this->setContinueTargets();
-    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     this->write16(start);
     endLocation.set();