SkSL interpreter performance improvements

Bug: skia:
Change-Id: Ib7aff4a5b159d8ec3b4b8bb96cee62ed5d277a04
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/211641
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
diff --git a/src/sksl/SkSLByteCode.h b/src/sksl/SkSLByteCode.h
index 92a2cca..327b20e 100644
--- a/src/sksl/SkSLByteCode.h
+++ b/src/sksl/SkSLByteCode.h
@@ -14,6 +14,7 @@
 
 enum class ByteCodeInstruction : uint8_t {
     kInvalid,
+    kNop,
     // B = bool, F = float, I = int, S = signed, U = unsigned
     kAddF,
     kAddI,
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index f918ef6..5bf18aa 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -145,20 +145,28 @@
     }
 }
 
+void ByteCodeGenerator::align(int divisor, int remainder) {
+    while ((int) fCode->size() % divisor != remainder) {
+        this->write(ByteCodeInstruction::kNop);
+    }
+}
+
 void ByteCodeGenerator::write8(uint8_t b) {
     fCode->push_back(b);
 }
 
 void ByteCodeGenerator::write16(uint16_t i) {
+    SkASSERT(fCode->size() % 2 == 0);
+    this->write8(i >> 0);
     this->write8(i >> 8);
-    this->write8(i);
 }
 
 void ByteCodeGenerator::write32(uint32_t i) {
-    this->write8((i >> 24) & 0xFF);
-    this->write8((i >> 16) & 0xFF);
-    this->write8((i >>  8) & 0xFF);
+    SkASSERT(fCode->size() % 4 == 0);
     this->write8((i >>  0) & 0xFF);
+    this->write8((i >>  8) & 0xFF);
+    this->write8((i >> 16) & 0xFF);
+    this->write8((i >> 24) & 0xFF);
 }
 
 void ByteCodeGenerator::write(ByteCodeInstruction i) {
@@ -286,6 +294,7 @@
 }
 
 void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) {
+    this->align(4, 3);
     this->write(ByteCodeInstruction::kPushImmediate);
     this->write32(1);
 }
@@ -333,6 +342,7 @@
 }
 
 void ByteCodeGenerator::writeFloatLiteral(const FloatLiteral& f) {
+    this->align(4, 3);
     this->write(ByteCodeInstruction::kPushImmediate);
     union { float f; uint32_t u; } pun = { (float) f.fValue };
     this->write32(pun.u);
@@ -349,6 +359,7 @@
 }
 
 void ByteCodeGenerator::writeIntLiteral(const IntLiteral& i) {
+    this->align(4, 3);
     this->write(ByteCodeInstruction::kPushImmediate);
     this->write32(i.fValue);
 }
@@ -364,6 +375,7 @@
         case Token::Kind::MINUSMINUS: {
             std::unique_ptr<LValue> lvalue = this->getLValue(*p.fOperand);
             lvalue->load();
+            this->align(4, 3);
             this->write(ByteCodeInstruction::kPushImmediate);
             this->write32(1);
             if (p.fOperator == Token::Kind::PLUSPLUS) {
@@ -403,6 +415,7 @@
         case Expression::kVariableReference_Kind: {
             const Variable& var = ((VariableReference&) *s.fBase).fVariable;
             int location = this->getLocation(var);
+            this->align(4, 3);
             this->write(ByteCodeInstruction::kPushImmediate);
             this->write32(location);
             this->write(ByteCodeInstruction::kLoadSwizzle);
@@ -430,6 +443,7 @@
         SkASSERT(location <= 255);
         this->write8(location);
     } else {
+        this->align(4, 3);
         this->write(ByteCodeInstruction::kPushImmediate);
         this->write32(this->getLocation(v.fVariable));
         int count = slot_count(v.fType);
@@ -499,6 +513,7 @@
 void ByteCodeGenerator::writeTarget(const Expression& e) {
     switch (e.fKind) {
         case Expression::kVariableReference_Kind:
+            this->align(4, 3);
             this->write(ByteCodeInstruction::kPushImmediate);
             this->write32(this->getLocation(((VariableReference&) e).fVariable));
             break;
@@ -547,6 +562,7 @@
         : INHERITED(*generator)
         , fCount(slot_count(var.fType))
         , fIsGlobal(var.fStorage == Variable::kGlobal_Storage) {
+        fGenerator.align(4, 3);
         fGenerator.write(ByteCodeInstruction::kPushImmediate);
         fGenerator.write32(generator->getLocation(var));
     }
@@ -617,11 +633,13 @@
 }
 
 void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) {
+    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     fBreakTargets.top().emplace_back(this);
 }
 
 void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) {
+    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     fContinueTargets.top().emplace_back(this);
 }
@@ -633,6 +651,7 @@
     this->writeStatement(*d.fStatement);
     this->setContinueTargets();
     this->writeExpression(*d.fTest);
+    this->align(2, 1);
     this->write(ByteCodeInstruction::kConditionalBranch);
     this->write16(start);
     this->setBreakTargets();
@@ -648,6 +667,7 @@
     if (f.fTest) {
         this->writeExpression(*f.fTest);
         this->write(ByteCodeInstruction::kNot);
+        this->align(2, 1);
         this->write(ByteCodeInstruction::kConditionalBranch);
         DeferredLocation endLocation(this);
         this->writeStatement(*f.fStatement);
@@ -657,6 +677,7 @@
             this->write(ByteCodeInstruction::kPop);
             this->write8(slot_count(f.fNext->fType));
         }
+        this->align(2, 1);
         this->write(ByteCodeInstruction::kBranch);
         this->write16(start);
         endLocation.set();
@@ -668,6 +689,7 @@
             this->write(ByteCodeInstruction::kPop);
             this->write8(slot_count(f.fNext->fType));
         }
+        this->align(2, 1);
         this->write(ByteCodeInstruction::kBranch);
         this->write16(start);
     }
@@ -677,9 +699,11 @@
 void ByteCodeGenerator::writeIfStatement(const IfStatement& i) {
     this->writeExpression(*i.fTest);
     this->write(ByteCodeInstruction::kNot);
+    this->align(2, 1);
     this->write(ByteCodeInstruction::kConditionalBranch);
     DeferredLocation elseLocation(this);
     this->writeStatement(*i.fIfTrue);
+    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     DeferredLocation endLocation(this);
     elseLocation.set();
@@ -707,6 +731,7 @@
         // has been allocated
         int location = getLocation(*decl.fVar);
         if (decl.fValue) {
+            this->align(4, 3);
             this->write(ByteCodeInstruction::kPushImmediate);
             this->write32(location);
             this->writeExpression(*decl.fValue);
@@ -726,10 +751,12 @@
     size_t start = fCode->size();
     this->writeExpression(*w.fTest);
     this->write(ByteCodeInstruction::kNot);
+    this->align(2, 1);
     this->write(ByteCodeInstruction::kConditionalBranch);
     DeferredLocation endLocation(this);
     this->writeStatement(*w.fStatement);
     this->setContinueTargets();
+    this->align(2, 1);
     this->write(ByteCodeInstruction::kBranch);
     this->write16(start);
     endLocation.set();
diff --git a/src/sksl/SkSLByteCodeGenerator.h b/src/sksl/SkSLByteCodeGenerator.h
index d89d578..497d462 100644
--- a/src/sksl/SkSLByteCodeGenerator.h
+++ b/src/sksl/SkSLByteCodeGenerator.h
@@ -83,6 +83,8 @@
 
     bool generateCode() override;
 
+    void align(int divisor, int remainder);
+
     void write8(uint8_t b);
 
     void write16(uint16_t b);
@@ -122,8 +124,8 @@
         void set() {
             int target = fGenerator.fCode->size();
             SkASSERT(target <= 65535);
-            (*fGenerator.fCode)[fOffset] = target >> 8;
-            (*fGenerator.fCode)[fOffset + 1] = target;
+            (*fGenerator.fCode)[fOffset] = target;
+            (*fGenerator.fCode)[fOffset + 1] = target >> 8;
 #ifdef SK_DEBUG
             fSet = true;
 #endif
diff --git a/src/sksl/SkSLInterpreter.cpp b/src/sksl/SkSLInterpreter.cpp
index 6b17e59..d4575b3 100644
--- a/src/sksl/SkSLInterpreter.cpp
+++ b/src/sksl/SkSLInterpreter.cpp
@@ -75,17 +75,15 @@
 }
 
 uint16_t Interpreter::read16() {
-    uint16_t result = (fCurrentFunction->fCode[fIP ] << 8) +
-                       fCurrentFunction->fCode[fIP + 1];
+    SkASSERT(fIP % 2 == 0);
+    uint16_t result = *(uint16_t*) &fCurrentFunction->fCode[fIP];
     fIP += 2;
     return result;
 }
 
 uint32_t Interpreter::read32() {
-    uint32_t result = (fCurrentFunction->fCode[fIP]     << 24) +
-                      (fCurrentFunction->fCode[fIP + 1] << 16) +
-                      (fCurrentFunction->fCode[fIP + 2] <<  8) +
-                       fCurrentFunction->fCode[fIP + 3];
+    SkASSERT(fIP % 4 == 0);
+    uint32_t result = *(uint32_t*) &fCurrentFunction->fCode[fIP];
     fIP += 4;
     return result;
 }
@@ -215,180 +213,6 @@
         break;                           \
     }
 
-void Interpreter::next() {
-#ifdef TRACE
-    printf("at %d\n", fIP);
-#endif
-    ByteCodeInstruction inst = (ByteCodeInstruction) this->read8();
-    switch (inst) {
-        BINARY_OP(kAddI, int32_t, fSigned, +)
-        BINARY_OP(kAddF, float, fFloat, +)
-        case ByteCodeInstruction::kBranch:
-            fIP = this->read16();
-            break;
-        BINARY_OP(kCompareIEQ, int32_t, fSigned, ==)
-        BINARY_OP(kCompareFEQ, float, fFloat, ==)
-        BINARY_OP(kCompareINEQ, int32_t, fSigned, !=)
-        BINARY_OP(kCompareFNEQ, float, fFloat, !=)
-        BINARY_OP(kCompareSGT, int32_t, fSigned, >)
-        BINARY_OP(kCompareUGT, uint32_t, fUnsigned, >)
-        BINARY_OP(kCompareFGT, float, fFloat, >)
-        BINARY_OP(kCompareSGTEQ, int32_t, fSigned, >=)
-        BINARY_OP(kCompareUGTEQ, uint32_t, fUnsigned, >=)
-        BINARY_OP(kCompareFGTEQ, float, fFloat, >=)
-        BINARY_OP(kCompareSLT, int32_t, fSigned, <)
-        BINARY_OP(kCompareULT, uint32_t, fUnsigned, <)
-        BINARY_OP(kCompareFLT, float, fFloat, <)
-        BINARY_OP(kCompareSLTEQ, int32_t, fSigned, <=)
-        BINARY_OP(kCompareULTEQ, uint32_t, fUnsigned, <=)
-        BINARY_OP(kCompareFLTEQ, float, fFloat, <=)
-        case ByteCodeInstruction::kConditionalBranch: {
-            int target = this->read16();
-            if (this->pop().fBool) {
-                fIP = target;
-            }
-            break;
-        }
-        case ByteCodeInstruction::kDebugPrint: {
-            Value v = this->pop();
-            printf("Debug: %d(int), %d(uint), %f(float)\n", v.fSigned, v.fUnsigned, v.fFloat);
-            break;
-        }
-        BINARY_OP(kDivideS, int32_t, fSigned, /)
-        BINARY_OP(kDivideU, uint32_t, fUnsigned, /)
-        BINARY_OP(kDivideF, float, fFloat, /)
-        case ByteCodeInstruction::kDup:
-            this->push(fStack.back());
-            break;
-        case ByteCodeInstruction::kDupDown: {
-            int count = this->read8();
-            for (int i = 0; i < count; ++i) {
-                fStack.insert(fStack.end() - i - count - 1, fStack[fStack.size() - i - 1]);
-            }
-            break;
-        }
-        case ByteCodeInstruction::kFloatToInt: {
-            Value& top = fStack.back();
-            top.fSigned = (int) top.fFloat;
-            break;
-        }
-        case ByteCodeInstruction::kSignedToFloat: {
-            Value& top = fStack.back();
-            top.fFloat = (float) top.fSigned;
-            break;
-        }
-        case ByteCodeInstruction::kUnsignedToFloat: {
-            Value& top = fStack.back();
-            top.fFloat = (float) top.fUnsigned;
-            break;
-        }
-        case ByteCodeInstruction::kLoad: {
-            int target = this->pop().fSigned;
-            SkASSERT(target < (int) fStack.size());
-            this->push(fStack[target]);
-            break;
-        }
-        case ByteCodeInstruction::kLoadGlobal: {
-            int target = this->read8();
-            SkASSERT(target < (int) fGlobals.size());
-            this->push(fGlobals[target]);
-            break;
-        }
-        case ByteCodeInstruction::kLoadSwizzle: {
-            Value target = this->pop();
-            int count = read8();
-            for (int i = 0; i < count; ++i) {
-                SkASSERT(target.fSigned + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
-                this->push(fStack[target.fSigned + fCurrentFunction->fCode[fIP + i]]);
-            }
-            fIP += count;
-            break;
-        }
-        BINARY_OP(kMultiplyS, int32_t, fSigned, *)
-        BINARY_OP(kMultiplyU, uint32_t, fUnsigned, *)
-        BINARY_OP(kMultiplyF, float, fFloat, *)
-        case ByteCodeInstruction::kNot: {
-            Value& top = fStack.back();
-            top.fBool = !top.fBool;
-            break;
-        }
-        case ByteCodeInstruction::kNegateF: {
-            Value& top = fStack.back();
-            top.fFloat = -top.fFloat;
-            break;
-        }
-        case ByteCodeInstruction::kNegateS: {
-            Value& top = fStack.back();
-            top.fSigned = -top.fSigned;
-            break;
-        }
-        case ByteCodeInstruction::kPop:
-            for (int i = read8(); i > 0; --i) {
-                this->pop();
-            }
-            break;
-        case ByteCodeInstruction::kPushImmediate:
-            this->push(Value((int) read32()));
-            break;
-        BINARY_OP(kRemainderS, int32_t, fSigned, %)
-        BINARY_OP(kRemainderU, uint32_t, fUnsigned, %)
-        case ByteCodeInstruction::kReturn: {
-            int count = this->read8();
-            for (int i = 0; i < count; ++i) {
-                fStack[i] = fStack[fStack.size() - count + i];
-            }
-            fIP = (int) fCurrentFunction->fCode.size();
-            break;
-        }
-        case ByteCodeInstruction::kStore: {
-            Value value = this->pop();
-            int target = this->pop().fSigned;
-            SkASSERT(target < (int) fStack.size());
-            fStack[target] = value;
-            break;
-        }
-        case ByteCodeInstruction::kStoreGlobal: {
-            Value value = this->pop();
-            int target = this->pop().fSigned;
-            SkASSERT(target < (int) fGlobals.size());
-            fGlobals[target] = value;
-            break;
-        }
-        case ByteCodeInstruction::kStoreSwizzle: {
-            int count = read8();
-            int target = fStack[fStack.size() - count - 1].fSigned;
-            for (int i = count - 1; i >= 0; --i) {
-                SkASSERT(target + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
-                fStack[target + fCurrentFunction->fCode[fIP + i]] = this->pop();
-            }
-            this->pop();
-            fIP += count;
-            break;
-        }
-        BINARY_OP(kSubtractI, int32_t, fSigned, -)
-        BINARY_OP(kSubtractF, float, fFloat, -)
-        case ByteCodeInstruction::kSwizzle: {
-            Value vec[4];
-            for (int i = this->read8() - 1; i >= 0; --i) {
-                vec[i] = this->pop();
-            }
-            for (int i = this->read8() - 1; i >= 0; --i) {
-                this->push(vec[this->read8()]);
-            }
-            break;
-        }
-        case ByteCodeInstruction::kVector:
-            this->nextVector(this->read8());
-            break;
-        default:
-            printf("unsupported instruction %d\n", (int) inst);
-            SkASSERT(false);
-    }
-#ifdef TRACE
-    this->dumpStack();
-#endif
-}
-
 static constexpr int VECTOR_MAX = 16;
 
 #define VECTOR_BINARY_OP(inst, type, field, op)               \
@@ -406,7 +230,7 @@
         break;                                                \
     }
 
-void Interpreter::nextVector(int count) {
+void Interpreter::vectorOp(int count) {
     ByteCodeInstruction inst = (ByteCodeInstruction) this->read8();
     switch (inst) {
         VECTOR_BINARY_OP(kAddI, int32_t, fSigned, +)
@@ -490,8 +314,6 @@
         }
         VECTOR_BINARY_OP(kSubtractI, int32_t, fSigned, -)
         VECTOR_BINARY_OP(kSubtractF, float, fFloat, -)
-        case ByteCodeInstruction::kVector:
-            this->nextVector(this->read8());
         default:
             printf("unsupported instruction %d\n", (int) inst);
             SkASSERT(false);
@@ -500,7 +322,179 @@
 
 void Interpreter::run() {
     while (fIP < (int) fCurrentFunction->fCode.size()) {
-        next();
+#ifdef TRACE
+        printf("at %d\n", fIP);
+#endif
+        ByteCodeInstruction inst = (ByteCodeInstruction) this->read8();
+        switch (inst) {
+            BINARY_OP(kAddI, int32_t, fSigned, +)
+            BINARY_OP(kAddF, float, fFloat, +)
+            case ByteCodeInstruction::kBranch:
+                fIP = this->read16();
+                break;
+            BINARY_OP(kCompareIEQ, int32_t, fSigned, ==)
+            BINARY_OP(kCompareFEQ, float, fFloat, ==)
+            BINARY_OP(kCompareINEQ, int32_t, fSigned, !=)
+            BINARY_OP(kCompareFNEQ, float, fFloat, !=)
+            BINARY_OP(kCompareSGT, int32_t, fSigned, >)
+            BINARY_OP(kCompareUGT, uint32_t, fUnsigned, >)
+            BINARY_OP(kCompareFGT, float, fFloat, >)
+            BINARY_OP(kCompareSGTEQ, int32_t, fSigned, >=)
+            BINARY_OP(kCompareUGTEQ, uint32_t, fUnsigned, >=)
+            BINARY_OP(kCompareFGTEQ, float, fFloat, >=)
+            BINARY_OP(kCompareSLT, int32_t, fSigned, <)
+            BINARY_OP(kCompareULT, uint32_t, fUnsigned, <)
+            BINARY_OP(kCompareFLT, float, fFloat, <)
+            BINARY_OP(kCompareSLTEQ, int32_t, fSigned, <=)
+            BINARY_OP(kCompareULTEQ, uint32_t, fUnsigned, <=)
+            BINARY_OP(kCompareFLTEQ, float, fFloat, <=)
+            case ByteCodeInstruction::kConditionalBranch: {
+                int target = this->read16();
+                if (this->pop().fBool) {
+                    fIP = target;
+                }
+                break;
+            }
+            case ByteCodeInstruction::kDebugPrint: {
+                Value v = this->pop();
+                printf("Debug: %d(int), %d(uint), %f(float)\n", v.fSigned, v.fUnsigned, v.fFloat);
+                break;
+            }
+            BINARY_OP(kDivideS, int32_t, fSigned, /)
+            BINARY_OP(kDivideU, uint32_t, fUnsigned, /)
+            BINARY_OP(kDivideF, float, fFloat, /)
+            case ByteCodeInstruction::kDup:
+                this->push(fStack.back());
+                break;
+            case ByteCodeInstruction::kDupDown: {
+                int count = this->read8();
+                for (int i = 0; i < count; ++i) {
+                    fStack.insert(fStack.end() - i - count - 1, fStack[fStack.size() - i - 1]);
+                }
+                break;
+            }
+            case ByteCodeInstruction::kFloatToInt: {
+                Value& top = fStack.back();
+                top.fSigned = (int) top.fFloat;
+                break;
+            }
+            case ByteCodeInstruction::kSignedToFloat: {
+                Value& top = fStack.back();
+                top.fFloat = (float) top.fSigned;
+                break;
+            }
+            case ByteCodeInstruction::kUnsignedToFloat: {
+                Value& top = fStack.back();
+                top.fFloat = (float) top.fUnsigned;
+                break;
+            }
+            case ByteCodeInstruction::kLoad: {
+                int target = this->pop().fSigned;
+                SkASSERT(target < (int) fStack.size());
+                this->push(fStack[target]);
+                break;
+            }
+            case ByteCodeInstruction::kLoadGlobal: {
+                int target = this->read8();
+                SkASSERT(target < (int) fGlobals.size());
+                this->push(fGlobals[target]);
+                break;
+            }
+            case ByteCodeInstruction::kLoadSwizzle: {
+                Value target = this->pop();
+                int count = read8();
+                for (int i = 0; i < count; ++i) {
+                    SkASSERT(target.fSigned + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
+                    this->push(fStack[target.fSigned + fCurrentFunction->fCode[fIP + i]]);
+                }
+                fIP += count;
+                break;
+            }
+            BINARY_OP(kMultiplyS, int32_t, fSigned, *)
+            BINARY_OP(kMultiplyU, uint32_t, fUnsigned, *)
+            BINARY_OP(kMultiplyF, float, fFloat, *)
+            case ByteCodeInstruction::kNot: {
+                Value& top = fStack.back();
+                top.fBool = !top.fBool;
+                break;
+            }
+            case ByteCodeInstruction::kNegateF: {
+                Value& top = fStack.back();
+                top.fFloat = -top.fFloat;
+                break;
+            }
+            case ByteCodeInstruction::kNegateS: {
+                Value& top = fStack.back();
+                top.fSigned = -top.fSigned;
+                break;
+            }
+            case ByteCodeInstruction::kNop:
+                break;
+            case ByteCodeInstruction::kPop:
+                for (int i = read8(); i > 0; --i) {
+                    this->pop();
+                }
+                break;
+            case ByteCodeInstruction::kPushImmediate:
+                this->push(Value((int) read32()));
+                break;
+            BINARY_OP(kRemainderS, int32_t, fSigned, %)
+            BINARY_OP(kRemainderU, uint32_t, fUnsigned, %)
+            case ByteCodeInstruction::kReturn: {
+                int count = this->read8();
+                for (int i = 0; i < count; ++i) {
+                    fStack[i] = fStack[fStack.size() - count + i];
+                }
+                fIP = (int) fCurrentFunction->fCode.size();
+                break;
+            }
+            case ByteCodeInstruction::kStore: {
+                Value value = this->pop();
+                int target = this->pop().fSigned;
+                SkASSERT(target < (int) fStack.size());
+                fStack[target] = value;
+                break;
+            }
+            case ByteCodeInstruction::kStoreGlobal: {
+                Value value = this->pop();
+                int target = this->pop().fSigned;
+                SkASSERT(target < (int) fGlobals.size());
+                fGlobals[target] = value;
+                break;
+            }
+            case ByteCodeInstruction::kStoreSwizzle: {
+                int count = read8();
+                int target = fStack[fStack.size() - count - 1].fSigned;
+                for (int i = count - 1; i >= 0; --i) {
+                    SkASSERT(target + fCurrentFunction->fCode[fIP + i] < (int) fStack.size());
+                    fStack[target + fCurrentFunction->fCode[fIP + i]] = this->pop();
+                }
+                this->pop();
+                fIP += count;
+                break;
+            }
+            BINARY_OP(kSubtractI, int32_t, fSigned, -)
+            BINARY_OP(kSubtractF, float, fFloat, -)
+            case ByteCodeInstruction::kSwizzle: {
+                Value vec[4];
+                for (int i = this->read8() - 1; i >= 0; --i) {
+                    vec[i] = this->pop();
+                }
+                for (int i = this->read8() - 1; i >= 0; --i) {
+                    this->push(vec[this->read8()]);
+                }
+                break;
+            }
+            case ByteCodeInstruction::kVector:
+                this->vectorOp(this->read8());
+                break;
+            default:
+                printf("unsupported instruction %d\n", (int) inst);
+                SkASSERT(false);
+        }
+#ifdef TRACE
+        this->dumpStack();
+#endif
     }
 }
 
diff --git a/src/sksl/SkSLInterpreter.h b/src/sksl/SkSLInterpreter.h
index b91be2a..5324fd9 100644
--- a/src/sksl/SkSLInterpreter.h
+++ b/src/sksl/SkSLInterpreter.h
@@ -70,9 +70,7 @@
 
     uint32_t read32();
 
-    void next();
-
-    void nextVector(int count);
+    inline void vectorOp(int count);
 
     void run();