Remove threaded code

Change-Id: I8bd4ba328b024364972ddafff70fb31ec5a79001
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/282621
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/sksl/SkSLByteCode.cpp b/src/sksl/SkSLByteCode.cpp
index 90c6671..99f0845 100644
--- a/src/sksl/SkSLByteCode.cpp
+++ b/src/sksl/SkSLByteCode.cpp
@@ -31,16 +31,10 @@
 #define READ8() (*(ip++))
 #define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
 #define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
-#define READ_INST() (ip += sizeof(instruction), \
-                     sk_unaligned_load<instruction>(ip - sizeof(instruction)))
+#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
+                     sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
 
-#define VECTOR_DISASSEMBLE(op, text)                                \
-    case ByteCodeInstruction::op: printf(text); ++ip; break;        \
-    case ByteCodeInstruction::op##2: printf(text "2"); ++ip; break; \
-    case ByteCodeInstruction::op##3: printf(text "3"); ++ip; break; \
-    case ByteCodeInstruction::op##4: printf(text "4"); ++ip; break;
-
-#define VECTOR_DISASSEMBLE_NO_COUNT(op, text)                 \
+#define VECTOR_DISASSEMBLE(op, text)                          \
     case ByteCodeInstruction::op: printf(text); break;        \
     case ByteCodeInstruction::op##2: printf(text "2"); break; \
     case ByteCodeInstruction::op##3: printf(text "3"); break; \
@@ -50,12 +44,8 @@
     VECTOR_DISASSEMBLE(op, text)            \
     case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
 
-#define VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(op, text) \
-    VECTOR_DISASSEMBLE_NO_COUNT(op, text)            \
-    case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
-
 static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
-    auto inst = (ByteCodeInstruction) (intptr_t) READ_INST();
+    auto inst = READ_INST();
     printf("%04x ", (int)inst);
     switch (inst) {
         VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
@@ -87,9 +77,9 @@
         VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
         VECTOR_DISASSEMBLE(kCompareULT, "compareult")
         VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
-        VECTOR_DISASSEMBLE_NO_COUNT(kConvertFtoI, "convertftoi")
-        VECTOR_DISASSEMBLE_NO_COUNT(kConvertStoF, "convertstof")
-        VECTOR_DISASSEMBLE_NO_COUNT(kConvertUtoF, "convertutof")
+        VECTOR_DISASSEMBLE(kConvertFtoI, "convertftoi")
+        VECTOR_DISASSEMBLE(kConvertStoF, "convertstof")
+        VECTOR_DISASSEMBLE(kConvertUtoF, "convertutof")
         VECTOR_DISASSEMBLE(kCos, "cos")
         VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
         VECTOR_DISASSEMBLE(kDivideS, "divideS")
@@ -98,18 +88,18 @@
         case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
         case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
         case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
-        case ByteCodeInstruction::kLoad: printf("load %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoad2: printf("load2 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoad3: printf("load3 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoad4: printf("load4 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ16() >> 8); break;
+        case ByteCodeInstruction::kLoad: printf("load %d", READ8()); break;
+        case ByteCodeInstruction::kLoad2: printf("load2 %d", READ8()); break;
+        case ByteCodeInstruction::kLoad3: printf("load3 %d", READ8()); break;
+        case ByteCodeInstruction::kLoad4: printf("load4 %d", READ8()); break;
+        case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ8()); break;
+        case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ8()); break;
+        case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ8()); break;
+        case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ8()); break;
+        case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ8()); break;
+        case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ8()); break;
+        case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ8()); break;
+        case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ8()); break;
         case ByteCodeInstruction::kLoadSwizzle: {
             int target = READ8();
             int count = READ8();
@@ -159,21 +149,21 @@
         }
         VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
         VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
-        VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kNegateF, "negatef")
-        VECTOR_DISASSEMBLE_NO_COUNT(kNegateI, "negatei")
+        VECTOR_MATRIX_DISASSEMBLE(kNegateF, "negatef")
+        VECTOR_DISASSEMBLE(kNegateI, "negatei")
         case ByteCodeInstruction::kNotB: printf("notb"); break;
         case ByteCodeInstruction::kOrB: printf("orb"); break;
-        VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kPop, "pop")
+        VECTOR_MATRIX_DISASSEMBLE(kPop, "pop")
         case ByteCodeInstruction::kPushImmediate: {
             uint32_t v = READ32();
             union { uint32_t u; float f; } pun = { v };
             printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
             break;
         }
-        case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ16() >> 8); break;
+        case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ8()); break;
+        case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ8()); break;
+        case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ8()); break;
+        case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ8()); break;
         VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
         VECTOR_DISASSEMBLE(kRemainderS, "remainders")
         VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
@@ -189,7 +179,7 @@
         case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
         case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
         VECTOR_DISASSEMBLE(kSin, "sin")
-        VECTOR_DISASSEMBLE_NO_COUNT(kSqrt, "sqrt")
+        VECTOR_DISASSEMBLE(kSqrt, "sqrt")
         case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
         case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
         case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
@@ -247,10 +237,10 @@
             break;
         }
         VECTOR_DISASSEMBLE(kTan, "tan")
-        case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ16() >> 8); break;
-        case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ16() >> 8); break;
+        case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ8()); break;
+        case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ8()); break;
+        case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ8()); break;
+        case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ8()); break;
         case ByteCodeInstruction::kXorB: printf("xorb"); break;
         case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
         case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
@@ -266,160 +256,129 @@
         case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
         case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
         default:
-            ip -= sizeof(instruction);
+            ip -= sizeof(ByteCodeInstruction);
             printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
             SkASSERT(false);
     }
     return ip;
 }
 
-#ifdef SKSLC_THREADED_CODE
-    #define LABEL(name) name:
-    #ifdef TRACE
-        #define NEXT() goto next
-    #else
-        #define NEXT() goto *READ_INST()
-    #endif
-#else
-    #define LABEL(name) case ByteCodeInstruction::name:
-    #define NEXT() continue
-#endif
-
-#define VECTOR_BINARY_OP(base, field, op)             \
-    LABEL(base ## 4)                                  \
-        sp[-4] = sp[-4].field op sp[0].field;         \
-        POP();                                        \
-        /* fall through */                            \
-    LABEL(base ## 3) {                                \
-        sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
-        POP();                                        \
-    }   /* fall through */                            \
-    LABEL(base ## 2) {                                \
-        sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
-        POP();                                        \
-    }   /* fall through */                            \
-    LABEL(base) {                                     \
-        sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
-        POP();                                        \
-        ++ip;                                         \
-        NEXT();                                       \
+#define VECTOR_BINARY_OP(base, field, op)                             \
+    case ByteCodeInstruction::base ## 4:                              \
+        sp[-4] = sp[-4].field op sp[0].field;                         \
+        POP();                                                        \
+        /* fall through */                                            \
+    case ByteCodeInstruction::base ## 3: {                            \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        sp[count] = sp[count].field op sp[0].field;                   \
+        POP();                                                        \
+    }   /* fall through */                                            \
+    case ByteCodeInstruction::base ## 2: {                            \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        sp[count] = sp[count].field op sp[0].field;                   \
+        POP();                                                        \
+    }   /* fall through */                                            \
+    case ByteCodeInstruction::base: {                                 \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        sp[count] = sp[count].field op sp[0].field;                   \
+        POP();                                                        \
+        continue;                                                     \
     }
 
 // A naive implementation of / or % using skvx operations will likely crash with a divide by zero
-// in inactive vector lanesm, so we need to be sure to avoid masked-off lanes.
-#define VECTOR_BINARY_MASKED_OP(base, field, op)            \
-    LABEL(base ## 4)                                        \
-        for (int i = 0; i < VecWidth; ++i) {                \
-            if (mask()[i]) {                                \
-                sp[-4].field[i] op ## = sp[0].field[i];     \
-            }                                               \
-        }                                                   \
-        POP();                                              \
-        /* fall through */                                  \
-    LABEL(base ## 3) {                                      \
-        for (int i = 0; i < VecWidth; ++i) {                \
-            if (mask()[i]) {                                \
-                sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
-            }                                               \
-        }                                                   \
-        POP();                                              \
-    }   /* fall through */                                  \
-    LABEL(base ## 2) {                                      \
-        for (int i = 0; i < VecWidth; ++i) {                \
-            if (mask()[i]) {                                \
-                sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
-            }                                               \
-        }                                                   \
-        POP();                                              \
-    }   /* fall through */                                  \
-    LABEL(base) {                                           \
-        for (int i = 0; i < VecWidth; ++i) {                \
-            if (mask()[i]) {                                \
-                sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
-            }                                               \
-        }                                                   \
-        POP();                                              \
-        ++ip;                                               \
-        NEXT();                                             \
+// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
+#define VECTOR_BINARY_MASKED_OP(base, field, op)                      \
+    case ByteCodeInstruction::base ## 4:                              \
+        for (int i = 0; i < VecWidth; ++i) {                          \
+            if (mask()[i]) {                                          \
+                sp[-4].field[i] op ## = sp[0].field[i];               \
+            }                                                         \
+        }                                                             \
+        POP();                                                        \
+        /* fall through */                                            \
+    case ByteCodeInstruction::base ## 3: {                            \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        for (int i = 0; i < VecWidth; ++i) {                          \
+            if (mask()[i]) {                                          \
+                sp[count].field[i] op ## = sp[0].field[i];            \
+            }                                                         \
+        }                                                             \
+        POP();                                                        \
+    }   /* fall through */                                            \
+    case ByteCodeInstruction::base ## 2: {                            \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        for (int i = 0; i < VecWidth; ++i) {                          \
+            if (mask()[i]) {                                          \
+                sp[count].field[i] op ## = sp[0].field[i];            \
+            }                                                         \
+        }                                                             \
+        POP();                                                        \
+    }   /* fall through */                                            \
+    case ByteCodeInstruction::base: {                                 \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        for (int i = 0; i < VecWidth; ++i) {                          \
+            if (mask()[i]) {                                          \
+                sp[count].field[i] op ## = sp[0].field[i];            \
+            }                                                         \
+        }                                                             \
+        POP();                                                        \
+        continue;                                                     \
     }
 
 
 #define VECTOR_MATRIX_BINARY_OP(base, field, op)          \
     VECTOR_BINARY_OP(base, field, op)                     \
-    LABEL(base ## N) {                                    \
+    case ByteCodeInstruction::base ## N: {                \
         int count = READ8();                              \
         for (int i = count; i > 0; --i) {                 \
             sp[-count] = sp[-count].field op sp[0].field; \
             POP();                                        \
         }                                                 \
-        NEXT();                                           \
+        continue;                                         \
     }
 
-#define VECTOR_BINARY_FN(base, field, fn)               \
-    LABEL(base ## 4)                                    \
-        sp[-4] = fn(sp[-4].field, sp[0].field);         \
-        POP();                                          \
-        /* fall through */                              \
-    LABEL(base ## 3) {                                  \
-        sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
-        POP();                                          \
-    }   /* fall through */                              \
-    LABEL(base ## 2) {                                  \
-        sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
-        POP();                                          \
-    }   /* fall through */                              \
-    LABEL(base) {                                       \
-        sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
-        POP();                                          \
-        ++ip;                                           \
-        NEXT();                                         \
+#define VECTOR_BINARY_FN(base, field, fn)                             \
+    case ByteCodeInstruction::base ## 4:                              \
+        sp[-4] = fn(sp[-4].field, sp[0].field);                       \
+        POP();                                                        \
+        /* fall through */                                            \
+    case ByteCodeInstruction::base ## 3: {                            \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        sp[count] = fn(sp[count].field, sp[0].field);                 \
+        POP();                                                        \
+    }   /* fall through */                                            \
+    case ByteCodeInstruction::base ## 2: {                            \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        sp[count] = fn(sp[count].field, sp[0].field);                 \
+        POP();                                                        \
+    }   /* fall through */                                            \
+    case ByteCodeInstruction::base: {                                 \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) - 1; \
+        sp[count] = fn(sp[count].field, sp[0].field);                 \
+        POP();                                                        \
+        continue;                                                     \
     }
 
-#define VECTOR_UNARY_FN(base, fn, field)         \
-    LABEL(base ## 4)  sp[-3] = fn(sp[-3].field); \
-    LABEL(base ## 3)  sp[-2] = fn(sp[-2].field); \
-    LABEL(base ## 2)  sp[-1] = fn(sp[-1].field); \
-    LABEL(base)       sp[ 0] = fn(sp[ 0].field); \
-                      NEXT();
+#define VECTOR_UNARY_FN(base, fn, field)                             \
+    case ByteCodeInstruction::base ## 4:  sp[-3] = fn(sp[-3].field); \
+    case ByteCodeInstruction::base ## 3:  sp[-2] = fn(sp[-2].field); \
+    case ByteCodeInstruction::base ## 2:  sp[-1] = fn(sp[-1].field); \
+    case ByteCodeInstruction::base:       sp[ 0] = fn(sp[ 0].field); \
+                      continue;
 
-#define VECTOR_UNARY_FN_VEC(base, fn)                     \
-    LABEL(base ## 4)                                      \
-    LABEL(base ## 3)                                      \
-    LABEL(base ## 2)                                      \
-    LABEL(base) {                                         \
-        int count = READ8();                              \
-        float* v = (float*)sp - count + 1;                \
-        for (int i = VecWidth * count; i > 0; --i, ++v) { \
-            *v = fn(*v);                                  \
-        }                                                 \
-        NEXT();                                           \
+#define VECTOR_UNARY_FN_VEC(base, fn)                                 \
+    case ByteCodeInstruction::base ## 4:                              \
+    case ByteCodeInstruction::base ## 3:                              \
+    case ByteCodeInstruction::base ## 2:                              \
+    case ByteCodeInstruction::base: {                                 \
+        int count = (int)inst - (int)(ByteCodeInstruction::base) + 1; \
+        float* v = (float*)sp - count + 1;                            \
+        for (int i = VecWidth * count; i > 0; --i, ++v) {             \
+            *v = fn(*v);                                              \
+        }                                                             \
+        continue;                                                     \
     }
 
-#define VECTOR_LABELS(base) \
-    &&base ## 4,            \
-    &&base ## 3,            \
-    &&base ## 2,            \
-    &&base
-
-#define VECTOR_MATRIX_LABELS(base) \
-    VECTOR_LABELS(base),           \
-    &&base ## N
-
-// If you trip this assert, it means that the order of the opcodes listed in ByteCodeInstruction
-// does not match the order of the opcodes listed in the 'labels' array in innerRun().
-#define CHECK_LABEL(name) \
-    SkASSERT(labels[(int) ByteCodeInstruction::name] == &&name)
-
-#define CHECK_VECTOR_LABELS(name) \
-    CHECK_LABEL(name ## 4);       \
-    CHECK_LABEL(name ## 3);       \
-    CHECK_LABEL(name ## 2);       \
-    CHECK_LABEL(name)
-
-#define CHECK_VECTOR_MATRIX_LABELS(name) \
-    CHECK_VECTOR_LABELS(name);           \
-    CHECK_LABEL(name ## N)
-
 union VValue {
     VValue() {}
     VValue(F32 f) : fFloat(f) {}
@@ -556,198 +515,6 @@
 static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
                      float* outReturn[], VValue globals[], const float uniforms[],
                      bool stripedOutput, int N, int baseIndex) {
-#ifdef SKSLC_THREADED_CODE
-    static const void* labels[] = {
-        // If you aren't familiar with it, the &&label syntax is the GCC / Clang "labels as values"
-        // extension. If you add anything to this array, be sure to add the corresponding
-        // CHECK_LABEL() or CHECK_*_LABELS() assert below.
-        VECTOR_MATRIX_LABELS(kAddF),
-        VECTOR_LABELS(kAddI),
-        &&kAndB,
-        &&kBranch,
-        &&kCall,
-        &&kCallExternal,
-        &&kClampIndex,
-        VECTOR_LABELS(kCompareIEQ),
-        VECTOR_LABELS(kCompareINEQ),
-        VECTOR_MATRIX_LABELS(kCompareFEQ),
-        VECTOR_MATRIX_LABELS(kCompareFNEQ),
-        VECTOR_LABELS(kCompareFGT),
-        VECTOR_LABELS(kCompareFGTEQ),
-        VECTOR_LABELS(kCompareFLT),
-        VECTOR_LABELS(kCompareFLTEQ),
-        VECTOR_LABELS(kCompareSGT),
-        VECTOR_LABELS(kCompareSGTEQ),
-        VECTOR_LABELS(kCompareSLT),
-        VECTOR_LABELS(kCompareSLTEQ),
-        VECTOR_LABELS(kCompareUGT),
-        VECTOR_LABELS(kCompareUGTEQ),
-        VECTOR_LABELS(kCompareULT),
-        VECTOR_LABELS(kCompareULTEQ),
-        VECTOR_LABELS(kConvertFtoI),
-        VECTOR_LABELS(kConvertStoF),
-        VECTOR_LABELS(kConvertUtoF),
-        VECTOR_LABELS(kCos),
-        VECTOR_MATRIX_LABELS(kDivideF),
-        VECTOR_LABELS(kDivideS),
-        VECTOR_LABELS(kDivideU),
-        VECTOR_MATRIX_LABELS(kDup),
-        &&kInverse2x2,
-        &&kInverse3x3,
-        &&kInverse4x4,
-        VECTOR_LABELS(kLoad),
-        VECTOR_LABELS(kLoadGlobal),
-        VECTOR_LABELS(kLoadUniform),
-        &&kLoadSwizzle,
-        &&kLoadSwizzleGlobal,
-        &&kLoadSwizzleUniform,
-        &&kLoadExtended,
-        &&kLoadExtendedGlobal,
-        &&kLoadExtendedUniform,
-        &&kMatrixToMatrix,
-        &&kMatrixMultiply,
-        VECTOR_MATRIX_LABELS(kNegateF),
-        VECTOR_LABELS(kNegateI),
-        VECTOR_MATRIX_LABELS(kMultiplyF),
-        VECTOR_LABELS(kMultiplyI),
-        &&kNotB,
-        &&kOrB,
-        VECTOR_MATRIX_LABELS(kPop),
-        &&kPushImmediate,
-        VECTOR_LABELS(kReadExternal),
-        VECTOR_LABELS(kRemainderF),
-        VECTOR_LABELS(kRemainderS),
-        VECTOR_LABELS(kRemainderU),
-        &&kReserve,
-        &&kReturn,
-        &&kScalarToMatrix,
-        &&kShiftLeft,
-        &&kShiftRightS,
-        &&kShiftRightU,
-        VECTOR_LABELS(kSin),
-        VECTOR_LABELS(kSqrt),
-        VECTOR_LABELS(kStore),
-        VECTOR_LABELS(kStoreGlobal),
-        &&kStoreExtended,
-        &&kStoreExtendedGlobal,
-        &&kStoreSwizzle,
-        &&kStoreSwizzleGlobal,
-        &&kStoreSwizzleIndirect,
-        &&kStoreSwizzleIndirectGlobal,
-        &&kSwizzle,
-        VECTOR_MATRIX_LABELS(kSubtractF),
-        VECTOR_LABELS(kSubtractI),
-        VECTOR_LABELS(kTan),
-        VECTOR_LABELS(kWriteExternal),
-        &&kXorB,
-
-        &&kMaskPush,
-        &&kMaskPop,
-        &&kMaskNegate,
-        &&kMaskBlend,
-        &&kBranchIfAllFalse,
-
-        &&kLoopBegin,
-        &&kLoopNext,
-        &&kLoopMask,
-        &&kLoopEnd,
-        &&kLoopBreak,
-        &&kLoopContinue,
-    };
-    // Verify that the order of the labels array matches the order of the ByteCodeInstruction enum.
-    CHECK_VECTOR_MATRIX_LABELS(kAddF);
-    CHECK_VECTOR_LABELS(kAddI);
-    CHECK_LABEL(kAndB);
-    CHECK_LABEL(kBranch);
-    CHECK_LABEL(kCall);
-    CHECK_LABEL(kCallExternal);
-    CHECK_LABEL(kClampIndex);
-    CHECK_VECTOR_LABELS(kCompareIEQ);
-    CHECK_VECTOR_LABELS(kCompareINEQ);
-    CHECK_VECTOR_MATRIX_LABELS(kCompareFEQ);
-    CHECK_VECTOR_MATRIX_LABELS(kCompareFNEQ);
-    CHECK_VECTOR_LABELS(kCompareFGT);
-    CHECK_VECTOR_LABELS(kCompareFGTEQ);
-    CHECK_VECTOR_LABELS(kCompareFLT);
-    CHECK_VECTOR_LABELS(kCompareFLTEQ);
-    CHECK_VECTOR_LABELS(kCompareSGT);
-    CHECK_VECTOR_LABELS(kCompareSGTEQ);
-    CHECK_VECTOR_LABELS(kCompareSLT);
-    CHECK_VECTOR_LABELS(kCompareSLTEQ);
-    CHECK_VECTOR_LABELS(kCompareUGT);
-    CHECK_VECTOR_LABELS(kCompareUGTEQ);
-    CHECK_VECTOR_LABELS(kCompareULT);
-    CHECK_VECTOR_LABELS(kCompareULTEQ);
-    CHECK_VECTOR_LABELS(kConvertFtoI);
-    CHECK_VECTOR_LABELS(kConvertStoF);
-    CHECK_VECTOR_LABELS(kConvertUtoF);
-    CHECK_VECTOR_LABELS(kCos);
-    CHECK_VECTOR_MATRIX_LABELS(kDivideF);
-    CHECK_VECTOR_LABELS(kDivideS);
-    CHECK_VECTOR_LABELS(kDivideU);
-    CHECK_VECTOR_MATRIX_LABELS(kDup);
-    CHECK_LABEL(kInverse2x2);
-    CHECK_LABEL(kInverse3x3);
-    CHECK_LABEL(kInverse4x4);
-    CHECK_VECTOR_LABELS(kLoad);
-    CHECK_VECTOR_LABELS(kLoadGlobal);
-    CHECK_VECTOR_LABELS(kLoadUniform);
-    CHECK_LABEL(kLoadSwizzle);
-    CHECK_LABEL(kLoadSwizzleGlobal);
-    CHECK_LABEL(kLoadSwizzleUniform);
-    CHECK_LABEL(kLoadExtended);
-    CHECK_LABEL(kLoadExtendedGlobal);
-    CHECK_LABEL(kLoadExtendedUniform);
-    CHECK_LABEL(kMatrixToMatrix);
-    CHECK_LABEL(kMatrixMultiply);
-    CHECK_VECTOR_MATRIX_LABELS(kNegateF);
-    CHECK_VECTOR_LABELS(kNegateI);
-    CHECK_VECTOR_MATRIX_LABELS(kMultiplyF);
-    CHECK_VECTOR_LABELS(kMultiplyI);
-    CHECK_LABEL(kNotB);
-    CHECK_LABEL(kOrB);
-    CHECK_VECTOR_MATRIX_LABELS(kPop);
-    CHECK_LABEL(kPushImmediate);
-    CHECK_VECTOR_LABELS(kReadExternal);
-    CHECK_VECTOR_LABELS(kRemainderF);
-    CHECK_VECTOR_LABELS(kRemainderS);
-    CHECK_VECTOR_LABELS(kRemainderU);
-    CHECK_LABEL(kReserve);
-    CHECK_LABEL(kReturn);
-    CHECK_LABEL(kScalarToMatrix);
-    CHECK_LABEL(kShiftLeft);
-    CHECK_LABEL(kShiftRightS);
-    CHECK_LABEL(kShiftRightU);
-    CHECK_VECTOR_LABELS(kSin);
-    CHECK_VECTOR_LABELS(kSqrt);
-    CHECK_VECTOR_LABELS(kStore);
-    CHECK_VECTOR_LABELS(kStoreGlobal);
-    CHECK_LABEL(kStoreExtended);
-    CHECK_LABEL(kStoreExtendedGlobal);
-    CHECK_LABEL(kStoreSwizzle);
-    CHECK_LABEL(kStoreSwizzleGlobal);
-    CHECK_LABEL(kStoreSwizzleIndirect);
-    CHECK_LABEL(kStoreSwizzleIndirectGlobal);
-    CHECK_LABEL(kSwizzle);
-    CHECK_VECTOR_MATRIX_LABELS(kSubtractF);
-    CHECK_VECTOR_LABELS(kSubtractI);
-    CHECK_VECTOR_LABELS(kTan);
-    CHECK_VECTOR_LABELS(kWriteExternal);
-    CHECK_LABEL(kXorB);
-    CHECK_LABEL(kMaskPush);
-    CHECK_LABEL(kMaskPop);
-    CHECK_LABEL(kMaskNegate);
-    CHECK_LABEL(kMaskBlend);
-    CHECK_LABEL(kBranchIfAllFalse);
-    CHECK_LABEL(kLoopBegin);
-    CHECK_LABEL(kLoopNext);
-    CHECK_LABEL(kLoopMask);
-    CHECK_LABEL(kLoopEnd);
-    CHECK_LABEL(kLoopBreak);
-    CHECK_LABEL(kLoopContinue);
-    f->fPreprocessOnce([f] { ((ByteCodeFunction*)f)->preprocess(labels); });
-#endif
-
     // Needs to be the first N non-negative integers, at least as large as VecWidth
     static const Interpreter::I32 gLanes = {
         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15
@@ -781,630 +548,606 @@
 
     auto mask = [&]() { return *maskPtr & *loopPtr; };
 
-#ifdef SKSLC_THREADED_CODE
-    // If the "labels as values" extension is available, we implement this using threaded code.
-    // Instead of opcodes, the code directly contains the addresses of the labels to jump to. Then
-    // the code for each opcode simply grabs the address of the next opcode and uses a goto to jump
-    // there.
-    NEXT();
-#else
-    // Otherwise, we have to use a switch statement and a loop to execute the right label.
     for (;;) {
-        #ifdef TRACE
-            printf("at %3d ", (int) (ip - code));
-            disassemble_instruction(ip);
-            printf(" (stack: %d)\n", (int) (sp - stack) + 1);
-        #endif
-        switch ((ByteCodeInstruction) READ16()) {
+#ifdef TRACE
+        printf("at %3d ", (int) (ip - code));
+        disassemble_instruction(ip);
+        printf(" (stack: %d)\n", (int) (sp - stack) + 1);
 #endif
+        ByteCodeInstruction inst = READ_INST();
+        switch (inst) {
 
-    VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
-    VECTOR_BINARY_OP(kAddI, fSigned, +)
+            VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
+            VECTOR_BINARY_OP(kAddI, fSigned, +)
 
-    // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
-    LABEL(kAndB)
-        sp[-1] = sp[-1].fSigned & sp[0].fSigned;
-        POP();
-        NEXT();
-    LABEL(kNotB)
-        sp[0] = ~sp[0].fSigned;
-        NEXT();
-    LABEL(kOrB)
-        sp[-1] = sp[-1].fSigned | sp[0].fSigned;
-        POP();
-        NEXT();
-    LABEL(kXorB)
-        sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
-        POP();
-        NEXT();
+            // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
+            case ByteCodeInstruction::kAndB:
+                sp[-1] = sp[-1].fSigned & sp[0].fSigned;
+                POP();
+                continue;
+            case ByteCodeInstruction::kNotB:
+                sp[0] = ~sp[0].fSigned;
+                continue;
+            case ByteCodeInstruction::kOrB:
+                sp[-1] = sp[-1].fSigned | sp[0].fSigned;
+                POP();
+                continue;
+            case ByteCodeInstruction::kXorB:
+                sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
+                POP();
+                continue;
 
-    LABEL(kBranch)
-        ip = code + READ16();
-        NEXT();
+            case ByteCodeInstruction::kBranch:
+                ip = code + READ16();
+                continue;
 
-    LABEL(kCall) {
-        // Precursor code reserved space for the return value, and pushed all parameters to
-        // the stack. Update our bottom of stack to point at the first parameter, and our
-        // sp to point past those parameters (plus space for locals).
-        int target = READ8();
-        const ByteCodeFunction* fun = byteCode->fFunctions[target].get();
-#ifdef SKSLC_THREADED_CODE
-        fun->fPreprocessOnce([fun] { ((ByteCodeFunction*)fun)->preprocess(labels); });
-#endif
-        if (skvx::any(mask())) {
-            frames.push_back({ code, ip, stack, fun->fParameterCount });
-            ip = code = fun->fCode.data();
-            stack = sp - fun->fParameterCount + 1;
-            sp = stack + fun->fParameterCount + fun->fLocalCount - 1;
-        }
-        NEXT();
-    }
-
-    LABEL(kCallExternal) {
-        CallExternal(byteCode, ip, sp, baseIndex, mask());
-        NEXT();
-    }
-
-    LABEL(kClampIndex) {
-        int length = READ8();
-        if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
-            return false;
-        }
-        NEXT();
-    }
-
-    VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
-    VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
-    VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
-    VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
-    VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
-    VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
-    VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
-    VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
-    VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
-    VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
-    VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
-    VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
-    VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
-    VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
-    VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
-    VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
-
-    LABEL(kConvertFtoI4) sp[-3] = skvx::cast<int>(sp[-3].fFloat);
-    LABEL(kConvertFtoI3) sp[-2] = skvx::cast<int>(sp[-2].fFloat);
-    LABEL(kConvertFtoI2) sp[-1] = skvx::cast<int>(sp[-1].fFloat);
-    LABEL(kConvertFtoI)  sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
-                         NEXT();
-
-    LABEL(kConvertStoF4) sp[-3] = skvx::cast<float>(sp[-3].fSigned);
-    LABEL(kConvertStoF3) sp[-2] = skvx::cast<float>(sp[-2].fSigned);
-    LABEL(kConvertStoF2) sp[-1] = skvx::cast<float>(sp[-1].fSigned);
-    LABEL(kConvertStoF)  sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
-                         NEXT();
-
-    LABEL(kConvertUtoF4) sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
-    LABEL(kConvertUtoF3) sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
-    LABEL(kConvertUtoF2) sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
-    LABEL(kConvertUtoF)  sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
-                         NEXT();
-
-    VECTOR_UNARY_FN_VEC(kCos, cosf)
-
-    VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
-    VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
-    VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
-
-    LABEL(kDup4) PUSH(sp[1 - ip[0]]);
-    LABEL(kDup3) PUSH(sp[1 - ip[0]]);
-    LABEL(kDup2) PUSH(sp[1 - ip[0]]);
-    LABEL(kDup)  PUSH(sp[1 - ip[0]]);
-                 ++ip;
-                 NEXT();
-
-    LABEL(kDupN) {
-        int count = READ8();
-        memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
-        sp += count;
-        NEXT();
-    }
-
-    LABEL(kInverse2x2) {
-        Inverse2x2(sp);
-        NEXT();
-    }
-    LABEL(kInverse3x3) {
-        Inverse3x3(sp);
-        NEXT();
-    }
-    LABEL(kInverse4x4) {
-        Inverse4x4(sp);
-        NEXT();
-    }
-
-    LABEL(kLoad4) sp[4] = stack[ip[1] + 3];
-    LABEL(kLoad3) sp[3] = stack[ip[1] + 2];
-    LABEL(kLoad2) sp[2] = stack[ip[1] + 1];
-    LABEL(kLoad)  sp[1] = stack[ip[1] + 0];
-                  sp += ip[0];
-                  ip += 2;
-                  NEXT();
-
-    LABEL(kLoadGlobal4) sp[4] = globals[ip[1] + 3];
-    LABEL(kLoadGlobal3) sp[3] = globals[ip[1] + 2];
-    LABEL(kLoadGlobal2) sp[2] = globals[ip[1] + 1];
-    LABEL(kLoadGlobal)  sp[1] = globals[ip[1] + 0];
-                        sp += ip[0];
-                        ip += 2;
-                        NEXT();
-
-    LABEL(kLoadUniform4) sp[4].fFloat = uniforms[ip[1] + 3];
-    LABEL(kLoadUniform3) sp[3].fFloat = uniforms[ip[1] + 2];
-    LABEL(kLoadUniform2) sp[2].fFloat = uniforms[ip[1] + 1];
-    LABEL(kLoadUniform)  sp[1].fFloat = uniforms[ip[1] + 0];
-                        sp += ip[0];
-                        ip += 2;
-                        NEXT();
-
-    LABEL(kLoadExtended) {
-        int count = READ8();
-        I32 src = POP().fSigned;
-        I32 m = mask();
-        for (int i = 0; i < count; ++i) {
-            for (int j = 0; j < VecWidth; ++j) {
-                if (m[j]) {
-                    sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
+            case ByteCodeInstruction::kCall: {
+                // Precursor code reserved space for the return value, and pushed all parameters to
+                // the stack. Update our bottom of stack to point at the first parameter, and our
+                // sp to point past those parameters (plus space for locals).
+                int target = READ8();
+                const ByteCodeFunction* fun = byteCode->fFunctions[target].get();
+                if (skvx::any(mask())) {
+                    frames.push_back({ code, ip, stack, fun->fParameterCount });
+                    ip = code = fun->fCode.data();
+                    stack = sp - fun->fParameterCount + 1;
+                    sp = stack + fun->fParameterCount + fun->fLocalCount - 1;
                 }
+                continue;
             }
-        }
-        sp += count;
-        NEXT();
-    }
 
-    LABEL(kLoadExtendedGlobal) {
-        int count = READ8();
-        I32 src = POP().fSigned;
-        I32 m = mask();
-        for (int i = 0; i < count; ++i) {
-            for (int j = 0; j < VecWidth; ++j) {
-                if (m[j]) {
-                    sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
+            case ByteCodeInstruction::kCallExternal: {
+                CallExternal(byteCode, ip, sp, baseIndex, mask());
+                continue;
+            }
+
+            case ByteCodeInstruction::kClampIndex: {
+                int length = READ8();
+                if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
+                    return false;
                 }
+                continue;
             }
-        }
-        sp += count;
-        NEXT();
-    }
 
-    LABEL(kLoadExtendedUniform) {
-        int count = READ8();
-        I32 src = POP().fSigned;
-        I32 m = mask();
-        for (int i = 0; i < count; ++i) {
-            for (int j = 0; j < VecWidth; ++j) {
-                if (m[j]) {
-                    sp[i + 1].fFloat[j] = uniforms[src[j] + i];
-                }
+            VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
+            VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
+            VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
+            VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
+            VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
+            VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
+            VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
+            VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
+            VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
+            VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
+            VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
+            VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
+            VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
+            VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
+            VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
+            VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
+
+            case ByteCodeInstruction::kConvertFtoI4: sp[-3] = skvx::cast<int>(sp[-3].fFloat);
+            case ByteCodeInstruction::kConvertFtoI3: sp[-2] = skvx::cast<int>(sp[-2].fFloat);
+            case ByteCodeInstruction::kConvertFtoI2: sp[-1] = skvx::cast<int>(sp[-1].fFloat);
+            case ByteCodeInstruction::kConvertFtoI:  sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
+                                                     continue;
+
+            case ByteCodeInstruction::kConvertStoF4: sp[-3] = skvx::cast<float>(sp[-3].fSigned);
+            case ByteCodeInstruction::kConvertStoF3: sp[-2] = skvx::cast<float>(sp[-2].fSigned);
+            case ByteCodeInstruction::kConvertStoF2: sp[-1] = skvx::cast<float>(sp[-1].fSigned);
+            case ByteCodeInstruction::kConvertStoF:  sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
+                                                     continue;
+
+            case ByteCodeInstruction::kConvertUtoF4: sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
+            case ByteCodeInstruction::kConvertUtoF3: sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
+            case ByteCodeInstruction::kConvertUtoF2: sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
+            case ByteCodeInstruction::kConvertUtoF:  sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
+                                                     continue;
+
+            VECTOR_UNARY_FN_VEC(kCos, cosf)
+
+            VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
+            VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
+            VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
+
+            case ByteCodeInstruction::kDup4: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
+            case ByteCodeInstruction::kDup3: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
+            case ByteCodeInstruction::kDup2: PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
+            case ByteCodeInstruction::kDup : PUSH(sp[(int)ByteCodeInstruction::kDup - (int)inst]);
+                                             continue;
+
+            case ByteCodeInstruction::kDupN: {
+                int count = READ8();
+                memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
+                sp += count;
+                continue;
             }
-        }
-        sp += count;
-        NEXT();
-    }
 
-    LABEL(kLoadSwizzle) {
-        int src = READ8();
-        int count = READ8();
-        for (int i = 0; i < count; ++i) {
-            PUSH(stack[src + *(ip + i)]);
-        }
-        ip += count;
-        NEXT();
-    }
+            case ByteCodeInstruction::kInverse2x2:
+                Inverse2x2(sp);
+                continue;
+            case ByteCodeInstruction::kInverse3x3:
+                Inverse3x3(sp);
+                continue;
+            case ByteCodeInstruction::kInverse4x4:
+                Inverse4x4(sp);
+                continue;
 
-    LABEL(kLoadSwizzleGlobal) {
-        int src = READ8();
-        int count = READ8();
-        for (int i = 0; i < count; ++i) {
-            PUSH(globals[src + *(ip + i)]);
-        }
-        ip += count;
-        NEXT();
-    }
+            case ByteCodeInstruction::kLoad4: sp[4] = stack[*ip + 3];
+            case ByteCodeInstruction::kLoad3: sp[3] = stack[*ip + 2];
+            case ByteCodeInstruction::kLoad2: sp[2] = stack[*ip + 1];
+            case ByteCodeInstruction::kLoad:  sp[1] = stack[*ip + 0];
+                        ++ip;
+                        sp += (int)ByteCodeInstruction::kLoad - (int)inst + 1;
+                        continue;
 
-    LABEL(kLoadSwizzleUniform) {
-        int src = READ8();
-        int count = READ8();
-        for (int i = 0; i < count; ++i) {
-            PUSH(F32(uniforms[src + *(ip + i)]));
-        }
-        ip += count;
-        NEXT();
-    }
+            case ByteCodeInstruction::kLoadGlobal4: sp[4] = globals[*ip + 3];
+            case ByteCodeInstruction::kLoadGlobal3: sp[3] = globals[*ip + 2];
+            case ByteCodeInstruction::kLoadGlobal2: sp[2] = globals[*ip + 1];
+            case ByteCodeInstruction::kLoadGlobal:  sp[1] = globals[*ip + 0];
+                                ++ip;
+                                sp += (int)ByteCodeInstruction::kLoadGlobal - (int)inst + 1;
+                                continue;
 
-    LABEL(kMatrixToMatrix) {
-        int srcCols = READ8();
-        int srcRows = READ8();
-        int dstCols = READ8();
-        int dstRows = READ8();
-        SkASSERT(srcCols >= 2 && srcCols <= 4);
-        SkASSERT(srcRows >= 2 && srcRows <= 4);
-        SkASSERT(dstCols >= 2 && dstCols <= 4);
-        SkASSERT(dstRows >= 2 && dstRows <= 4);
-        F32 tmp[16];
-        memset(tmp, 0, sizeof(tmp));
-        tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
-        for (int c = srcCols - 1; c >= 0; --c) {
-            for (int r = srcRows - 1; r >= 0; --r) {
-                tmp[c*4 + r] = POP().fFloat;
-            }
-        }
-        for (int c = 0; c < dstCols; ++c) {
-            for (int r = 0; r < dstRows; ++r) {
-                PUSH(tmp[c*4 + r]);
-            }
-        }
-        NEXT();
-    }
+            case ByteCodeInstruction::kLoadUniform4: sp[4].fFloat = uniforms[*ip + 3];
+            case ByteCodeInstruction::kLoadUniform3: sp[3].fFloat = uniforms[*ip + 2];
+            case ByteCodeInstruction::kLoadUniform2: sp[2].fFloat = uniforms[*ip + 1];
+            case ByteCodeInstruction::kLoadUniform:  sp[1].fFloat = uniforms[*ip + 0];
+                                ++ip;
+                                sp += (int)ByteCodeInstruction::kLoadUniform - (int)inst + 1;
+                                continue;
 
-    LABEL(kMatrixMultiply) {
-        int lCols = READ8();
-        int lRows = READ8();
-        int rCols = READ8();
-        int rRows = lCols;
-        F32 tmp[16] = { 0.0f };
-        F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
-        F32* A = B - (lCols * lRows);
-        for (int c = 0; c < rCols; ++c) {
-            for (int r = 0; r < lRows; ++r) {
-                for (int j = 0; j < lCols; ++j) {
-                    tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
-                }
-            }
-        }
-        sp -= (lCols * lRows) + (rCols * rRows);
-        memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
-        sp += (rCols * lRows);
-        NEXT();
-    }
-
-    VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
-    VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
-
-    LABEL(kNegateF4) sp[-3] = -sp[-3].fFloat;
-    LABEL(kNegateF3) sp[-2] = -sp[-2].fFloat;
-    LABEL(kNegateF2) sp[-1] = -sp[-1].fFloat;
-    LABEL(kNegateF)  sp[ 0] = -sp[ 0].fFloat;
-                     NEXT();
-
-    LABEL(kNegateFN) {
-        int count = READ8();
-        for (int i = count - 1; i >= 0; --i) {
-            sp[-i] = -sp[-i].fFloat;
-        }
-        NEXT();
-    }
-
-    LABEL(kNegateI4) sp[-3] = -sp[-3].fSigned;
-    LABEL(kNegateI3) sp[-2] = -sp[-2].fSigned;
-    LABEL(kNegateI2) sp[-1] = -sp[-1].fSigned;
-    LABEL(kNegateI)  sp[ 0] = -sp[ 0].fSigned;
-                     NEXT();
-
-    LABEL(kPop4) POP();
-    LABEL(kPop3) POP();
-    LABEL(kPop2) POP();
-    LABEL(kPop)  POP();
-                 NEXT();
-
-    LABEL(kPopN)
-        sp -= READ8();
-        NEXT();
-
-    LABEL(kPushImmediate)
-        PUSH(U32(READ32()));
-        NEXT();
-
-    LABEL(kReadExternal)
-    LABEL(kReadExternal2)
-    LABEL(kReadExternal3)
-    LABEL(kReadExternal4) {
-        int count = READ8();
-        int src = READ8();
-        float tmp[4];
-        I32 m = mask();
-        for (int i = 0; i < VecWidth; ++i) {
-            if (m[i]) {
-                byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
-                for (int j = 0; j < count; ++j) {
-                    sp[j + 1].fFloat[i] = tmp[j];
-                }
-            }
-        }
-        sp += count;
-        NEXT();
-    }
-
-    VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
-    VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
-    VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
-
-    LABEL(kReserve)
-        sp += READ8();
-        NEXT();
-
-    LABEL(kReturn) {
-        int count = READ8();
-        if (frames.empty()) {
-            if (outReturn) {
-                VValue* src = sp - count + 1;
-                if (stripedOutput) {
-                    for (int i = 0; i < count; ++i) {
-                        memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
-                        ++src;
-                    }
-                } else {
-                    float* outPtr = outReturn[0];
-                    for (int i = 0; i < count; ++i) {
-                        for (int j = 0; j < N; ++j) {
-                            outPtr[count * j] = src->fFloat[j];
+            case ByteCodeInstruction::kLoadExtended: {
+                int count = READ8();
+                I32 src = POP().fSigned;
+                I32 m = mask();
+                for (int i = 0; i < count; ++i) {
+                    for (int j = 0; j < VecWidth; ++j) {
+                        if (m[j]) {
+                            sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
                         }
-                        ++outPtr;
-                        ++src;
                     }
                 }
+                sp += count;
+                continue;
             }
-            return true;
-        } else {
-            // When we were called, the caller reserved stack space for their copy of our
-            // return value, then 'stack' was positioned after that, where our parameters
-            // were placed. Copy our return values to their reserved area.
-            memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
 
-            // Now move the stack pointer to the end of the passed-in parameters. This odd
-            // calling convention requires the caller to pop the arguments after calling,
-            // but allows them to store any out-parameters back during that unwinding.
-            // After that sequence finishes, the return value will be the top of the stack.
-            const StackFrame& frame(frames.back());
-            sp = stack + frame.fParameterCount - 1;
-            stack = frame.fStack;
-            code = frame.fCode;
-            ip = frame.fIP;
-            frames.pop_back();
-            NEXT();
-        }
-    }
-
-    LABEL(kScalarToMatrix) {
-        int cols = READ8();
-        int rows = READ8();
-        VValue v = POP();
-        for (int c = 0; c < cols; ++c) {
-            for (int r = 0; r < rows; ++r) {
-                PUSH(c == r ? v : F32(0.0f));
+            case ByteCodeInstruction::kLoadExtendedGlobal: {
+                int count = READ8();
+                I32 src = POP().fSigned;
+                I32 m = mask();
+                for (int i = 0; i < count; ++i) {
+                    for (int j = 0; j < VecWidth; ++j) {
+                        if (m[j]) {
+                            sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
+                        }
+                    }
+                }
+                sp += count;
+                continue;
             }
-        }
-        NEXT();
-    }
 
-    LABEL(kShiftLeft)
-        sp[0] = sp[0].fSigned << READ8();
-        NEXT();
-    LABEL(kShiftRightS)
-        sp[0] = sp[0].fSigned >> READ8();
-        NEXT();
-    LABEL(kShiftRightU)
-        sp[0] = sp[0].fUnsigned >> READ8();
-        NEXT();
+            case ByteCodeInstruction::kLoadExtendedUniform: {
+                int count = READ8();
+                I32 src = POP().fSigned;
+                I32 m = mask();
+                for (int i = 0; i < count; ++i) {
+                    for (int j = 0; j < VecWidth; ++j) {
+                        if (m[j]) {
+                            sp[i + 1].fFloat[j] = uniforms[src[j] + i];
+                        }
+                    }
+                }
+                sp += count;
+                continue;
+            }
 
-    VECTOR_UNARY_FN_VEC(kSin, sinf)
-    VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
+            case ByteCodeInstruction::kLoadSwizzle: {
+                int src = READ8();
+                int count = READ8();
+                for (int i = 0; i < count; ++i) {
+                    PUSH(stack[src + *(ip + i)]);
+                }
+                ip += count;
+                continue;
+            }
 
-    LABEL(kStore4)
-        stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
-    LABEL(kStore3)
-        stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
-    LABEL(kStore2)
-        stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
-    LABEL(kStore)
-        stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
-        ++ip;
-        NEXT();
+            case ByteCodeInstruction::kLoadSwizzleGlobal: {
+                int src = READ8();
+                int count = READ8();
+                for (int i = 0; i < count; ++i) {
+                    PUSH(globals[src + *(ip + i)]);
+                }
+                ip += count;
+                continue;
+            }
 
-    LABEL(kStoreGlobal4)
-        globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
-    LABEL(kStoreGlobal3)
-        globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
-    LABEL(kStoreGlobal2)
-        globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
-    LABEL(kStoreGlobal)
-        globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
-        ++ip;
-        NEXT();
+            case ByteCodeInstruction::kLoadSwizzleUniform: {
+                int src = READ8();
+                int count = READ8();
+                for (int i = 0; i < count; ++i) {
+                    PUSH(F32(uniforms[src + *(ip + i)]));
+                }
+                ip += count;
+                continue;
+            }
 
-    LABEL(kStoreExtended) {
-        int count = READ8();
-        I32 target = POP().fSigned;
-        VValue* src = sp - count + 1;
-        I32 m = mask();
-        for (int i = 0; i < count; ++i) {
-            for (int j = 0; j < VecWidth; ++j) {
-                if (m[j]) {
-                    stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
+            case ByteCodeInstruction::kMatrixToMatrix: {
+                int srcCols = READ8();
+                int srcRows = READ8();
+                int dstCols = READ8();
+                int dstRows = READ8();
+                SkASSERT(srcCols >= 2 && srcCols <= 4);
+                SkASSERT(srcRows >= 2 && srcRows <= 4);
+                SkASSERT(dstCols >= 2 && dstCols <= 4);
+                SkASSERT(dstRows >= 2 && dstRows <= 4);
+                F32 tmp[16];
+                memset(tmp, 0, sizeof(tmp));
+                tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
+                for (int c = srcCols - 1; c >= 0; --c) {
+                    for (int r = srcRows - 1; r >= 0; --r) {
+                        tmp[c*4 + r] = POP().fFloat;
+                    }
+                }
+                for (int c = 0; c < dstCols; ++c) {
+                    for (int r = 0; r < dstRows; ++r) {
+                        PUSH(tmp[c*4 + r]);
+                    }
+                }
+                continue;
+            }
+
+            case ByteCodeInstruction::kMatrixMultiply: {
+                int lCols = READ8();
+                int lRows = READ8();
+                int rCols = READ8();
+                int rRows = lCols;
+                F32 tmp[16] = { 0.0f };
+                F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
+                F32* A = B - (lCols * lRows);
+                for (int c = 0; c < rCols; ++c) {
+                    for (int r = 0; r < lRows; ++r) {
+                        for (int j = 0; j < lCols; ++j) {
+                            tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
+                        }
+                    }
+                }
+                sp -= (lCols * lRows) + (rCols * rRows);
+                memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
+                sp += (rCols * lRows);
+                continue;
+            }
+
+            VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
+            VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
+
+            case ByteCodeInstruction::kNegateF4: sp[-3] = -sp[-3].fFloat;
+            case ByteCodeInstruction::kNegateF3: sp[-2] = -sp[-2].fFloat;
+            case ByteCodeInstruction::kNegateF2: sp[-1] = -sp[-1].fFloat;
+            case ByteCodeInstruction::kNegateF:  sp[ 0] = -sp[ 0].fFloat;
+                                                 continue;
+
+            case ByteCodeInstruction::kNegateFN: {
+                int count = READ8();
+                for (int i = count - 1; i >= 0; --i) {
+                    sp[-i] = -sp[-i].fFloat;
+                }
+                continue;
+            }
+
+            case ByteCodeInstruction::kNegateI4: sp[-3] = -sp[-3].fSigned;
+            case ByteCodeInstruction::kNegateI3: sp[-2] = -sp[-2].fSigned;
+            case ByteCodeInstruction::kNegateI2: sp[-1] = -sp[-1].fSigned;
+            case ByteCodeInstruction::kNegateI:  sp[ 0] = -sp[ 0].fSigned;
+                                                 continue;
+
+            case ByteCodeInstruction::kPop4: POP();
+            case ByteCodeInstruction::kPop3: POP();
+            case ByteCodeInstruction::kPop2: POP();
+            case ByteCodeInstruction::kPop:  POP();
+                                             continue;
+
+            case ByteCodeInstruction::kPopN:
+                sp -= READ8();
+                continue;
+
+            case ByteCodeInstruction::kPushImmediate:
+                PUSH(U32(READ32()));
+                continue;
+
+            case ByteCodeInstruction::kReadExternal:
+            case ByteCodeInstruction::kReadExternal2:
+            case ByteCodeInstruction::kReadExternal3:
+            case ByteCodeInstruction::kReadExternal4: {
+                int count = (int)ByteCodeInstruction::kReadExternal - (int)inst + 1;
+                int src = READ8();
+                float tmp[4];
+                I32 m = mask();
+                for (int i = 0; i < VecWidth; ++i) {
+                    if (m[i]) {
+                        byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
+                        for (int j = 0; j < count; ++j) {
+                            sp[j + 1].fFloat[i] = tmp[j];
+                        }
+                    }
+                }
+                sp += count;
+                continue;
+            }
+
+            VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
+            VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
+            VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
+
+            case ByteCodeInstruction::kReserve:
+                sp += READ8();
+                continue;
+
+            case ByteCodeInstruction::kReturn: {
+                int count = READ8();
+                if (frames.empty()) {
+                    if (outReturn) {
+                        VValue* src = sp - count + 1;
+                        if (stripedOutput) {
+                            for (int i = 0; i < count; ++i) {
+                                memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
+                                ++src;
+                            }
+                        } else {
+                            float* outPtr = outReturn[0];
+                            for (int i = 0; i < count; ++i) {
+                                for (int j = 0; j < N; ++j) {
+                                    outPtr[count * j] = src->fFloat[j];
+                                }
+                                ++outPtr;
+                                ++src;
+                            }
+                        }
+                    }
+                    return true;
+                } else {
+                    // When we were called, the caller reserved stack space for their copy of our
+                    // return value, then 'stack' was positioned after that, where our parameters
+                    // were placed. Copy our return values to their reserved area.
+                    memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
+
+                    // Now move the stack pointer to the end of the passed-in parameters. This odd
+                    // calling convention requires the caller to pop the arguments after calling,
+                    // but allows them to store any out-parameters back during that unwinding.
+                    // After that sequence finishes, the return value will be the top of the stack.
+                    const StackFrame& frame(frames.back());
+                    sp = stack + frame.fParameterCount - 1;
+                    stack = frame.fStack;
+                    code = frame.fCode;
+                    ip = frame.fIP;
+                    frames.pop_back();
+                    continue;
                 }
             }
-        }
-        sp -= count;
-        NEXT();
-    }
-    LABEL(kStoreExtendedGlobal) {
-        int count = READ8();
-        I32 target = POP().fSigned;
-        VValue* src = sp - count + 1;
-        I32 m = mask();
-        for (int i = 0; i < count; ++i) {
-            for (int j = 0; j < VecWidth; ++j) {
-                if (m[j]) {
-                    globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
+
+            case ByteCodeInstruction::kScalarToMatrix: {
+                int cols = READ8();
+                int rows = READ8();
+                VValue v = POP();
+                for (int c = 0; c < cols; ++c) {
+                    for (int r = 0; r < rows; ++r) {
+                        PUSH(c == r ? v : F32(0.0f));
+                    }
                 }
+                continue;
+            }
+
+            case ByteCodeInstruction::kShiftLeft:
+                sp[0] = sp[0].fSigned << READ8();
+                continue;
+            case ByteCodeInstruction::kShiftRightS:
+                sp[0] = sp[0].fSigned >> READ8();
+                continue;
+            case ByteCodeInstruction::kShiftRightU:
+                sp[0] = sp[0].fUnsigned >> READ8();
+                continue;
+
+            VECTOR_UNARY_FN_VEC(kSin, sinf)
+            VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
+
+            case ByteCodeInstruction::kStore4:
+                stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
+            case ByteCodeInstruction::kStore3:
+                stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
+            case ByteCodeInstruction::kStore2:
+                stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
+            case ByteCodeInstruction::kStore:
+                stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
+                ++ip;
+                continue;
+
+            case ByteCodeInstruction::kStoreGlobal4:
+                globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
+            case ByteCodeInstruction::kStoreGlobal3:
+                globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
+            case ByteCodeInstruction::kStoreGlobal2:
+                globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
+            case ByteCodeInstruction::kStoreGlobal:
+                globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
+                ++ip;
+                continue;
+
+            case ByteCodeInstruction::kStoreExtended: {
+                int count = READ8();
+                I32 target = POP().fSigned;
+                VValue* src = sp - count + 1;
+                I32 m = mask();
+                for (int i = 0; i < count; ++i) {
+                    for (int j = 0; j < VecWidth; ++j) {
+                        if (m[j]) {
+                            stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
+                        }
+                    }
+                }
+                sp -= count;
+                continue;
+            }
+            case ByteCodeInstruction::kStoreExtendedGlobal: {
+                int count = READ8();
+                I32 target = POP().fSigned;
+                VValue* src = sp - count + 1;
+                I32 m = mask();
+                for (int i = 0; i < count; ++i) {
+                    for (int j = 0; j < VecWidth; ++j) {
+                        if (m[j]) {
+                            globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
+                        }
+                    }
+                }
+                sp -= count;
+                continue;
+            }
+
+            case ByteCodeInstruction::kStoreSwizzle: {
+                int target = READ8();
+                int count = READ8();
+                for (int i = count - 1; i >= 0; --i) {
+                    stack[target + *(ip + i)] = skvx::if_then_else(
+                            mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
+                }
+                ip += count;
+                continue;
+            }
+
+            case ByteCodeInstruction::kStoreSwizzleGlobal: {
+                int target = READ8();
+                int count = READ8();
+                for (int i = count - 1; i >= 0; --i) {
+                    globals[target + *(ip + i)] = skvx::if_then_else(
+                            mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
+                }
+                ip += count;
+                continue;
+            }
+
+            case ByteCodeInstruction::kStoreSwizzleIndirect: {
+                int count = READ8();
+                I32 target = POP().fSigned;
+                I32 m = mask();
+                for (int i = count - 1; i >= 0; --i) {
+                    I32 v = POP().fSigned;
+                    for (int j = 0; j < VecWidth; ++j) {
+                        if (m[j]) {
+                            stack[target[j] + *(ip + i)].fSigned[j] = v[j];
+                        }
+                    }
+                }
+                ip += count;
+                continue;
+            }
+
+            case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
+                int count = READ8();
+                I32 target = POP().fSigned;
+                I32 m = mask();
+                for (int i = count - 1; i >= 0; --i) {
+                    I32 v = POP().fSigned;
+                    for (int j = 0; j < VecWidth; ++j) {
+                        if (m[j]) {
+                            globals[target[j] + *(ip + i)].fSigned[j] = v[j];
+                        }
+                    }
+                }
+                ip += count;
+                continue;
+            }
+
+            VECTOR_BINARY_OP(kSubtractI, fSigned, -)
+            VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
+
+            case ByteCodeInstruction::kSwizzle: {
+                VValue tmp[4];
+                for (int i = READ8() - 1; i >= 0; --i) {
+                    tmp[i] = POP();
+                }
+                for (int i = READ8() - 1; i >= 0; --i) {
+                    PUSH(tmp[READ8()]);
+                }
+                continue;
+            }
+
+            VECTOR_UNARY_FN_VEC(kTan, tanf)
+
+            case ByteCodeInstruction::kWriteExternal4:
+            case ByteCodeInstruction::kWriteExternal3:
+            case ByteCodeInstruction::kWriteExternal2:
+            case ByteCodeInstruction::kWriteExternal: {
+                int count = (int)ByteCodeInstruction::kWriteExternal - (int)inst + 1;
+                int target = READ8();
+                float tmp[4];
+                I32 m = mask();
+                sp -= count;
+                for (int i = 0; i < VecWidth; ++i) {
+                    if (m[i]) {
+                        for (int j = 0; j < count; ++j) {
+                            tmp[j] = sp[j + 1].fFloat[i];
+                        }
+                        byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
+                    }
+                }
+                continue;
+            }
+
+            case ByteCodeInstruction::kMaskPush:
+                condPtr[1] = POP().fSigned;
+                maskPtr[1] = maskPtr[0] & condPtr[1];
+                ++condPtr; ++maskPtr;
+                continue;
+            case ByteCodeInstruction::kMaskPop:
+                --condPtr; --maskPtr;
+                continue;
+            case ByteCodeInstruction::kMaskNegate:
+                maskPtr[0] = maskPtr[-1] & ~condPtr[0];
+                continue;
+            case ByteCodeInstruction::kMaskBlend: {
+                int count = READ8();
+                I32 m = condPtr[0];
+                --condPtr; --maskPtr;
+                for (int i = 0; i < count; ++i) {
+                    sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
+                    --sp;
+                }
+                continue;
+            }
+            case ByteCodeInstruction::kBranchIfAllFalse: {
+                int target = READ16();
+                if (!skvx::any(mask())) {
+                    ip = code + target;
+                }
+                continue;
+            }
+
+            case ByteCodeInstruction::kLoopBegin:
+                contPtr[1] = 0;
+                loopPtr[1] = loopPtr[0];
+                ++contPtr; ++loopPtr;
+                continue;
+            case ByteCodeInstruction::kLoopNext:
+                *loopPtr |= *contPtr;
+                *contPtr = 0;
+                continue;
+            case ByteCodeInstruction::kLoopMask:
+                *loopPtr &= POP().fSigned;
+                continue;
+            case ByteCodeInstruction::kLoopEnd:
+                --contPtr; --loopPtr;
+                continue;
+            case ByteCodeInstruction::kLoopBreak:
+                *loopPtr &= ~mask();
+                continue;
+            case ByteCodeInstruction::kLoopContinue: {
+                I32 m = mask();
+                *contPtr |=  m;
+                *loopPtr &= ~m;
+                continue;
             }
         }
-        sp -= count;
-        NEXT();
     }
-
-    LABEL(kStoreSwizzle) {
-        int target = READ8();
-        int count = READ8();
-        for (int i = count - 1; i >= 0; --i) {
-            stack[target + *(ip + i)] = skvx::if_then_else(
-                    mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
-        }
-        ip += count;
-        NEXT();
-    }
-
-    LABEL(kStoreSwizzleGlobal) {
-        int target = READ8();
-        int count = READ8();
-        for (int i = count - 1; i >= 0; --i) {
-            globals[target + *(ip + i)] = skvx::if_then_else(
-                    mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
-        }
-        ip += count;
-        NEXT();
-    }
-
-    LABEL(kStoreSwizzleIndirect) {
-        int count = READ8();
-        I32 target = POP().fSigned;
-        I32 m = mask();
-        for (int i = count - 1; i >= 0; --i) {
-            I32 v = POP().fSigned;
-            for (int j = 0; j < VecWidth; ++j) {
-                if (m[j]) {
-                    stack[target[j] + *(ip + i)].fSigned[j] = v[j];
-                }
-            }
-        }
-        ip += count;
-        NEXT();
-    }
-
-    LABEL(kStoreSwizzleIndirectGlobal) {
-        int count = READ8();
-        I32 target = POP().fSigned;
-        I32 m = mask();
-        for (int i = count - 1; i >= 0; --i) {
-            I32 v = POP().fSigned;
-            for (int j = 0; j < VecWidth; ++j) {
-                if (m[j]) {
-                    globals[target[j] + *(ip + i)].fSigned[j] = v[j];
-                }
-            }
-        }
-        ip += count;
-        NEXT();
-    }
-
-    VECTOR_BINARY_OP(kSubtractI, fSigned, -)
-    VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
-
-    LABEL(kSwizzle) {
-        VValue tmp[4];
-        for (int i = READ8() - 1; i >= 0; --i) {
-            tmp[i] = POP();
-        }
-        for (int i = READ8() - 1; i >= 0; --i) {
-            PUSH(tmp[READ8()]);
-        }
-        NEXT();
-    }
-
-    VECTOR_UNARY_FN_VEC(kTan, tanf)
-
-    LABEL(kWriteExternal4)
-    LABEL(kWriteExternal3)
-    LABEL(kWriteExternal2)
-    LABEL(kWriteExternal) {
-        int count = READ8();
-        int target = READ8();
-        float tmp[4];
-        I32 m = mask();
-        sp -= count;
-        for (int i = 0; i < VecWidth; ++i) {
-            if (m[i]) {
-                for (int j = 0; j < count; ++j) {
-                    tmp[j] = sp[j + 1].fFloat[i];
-                }
-                byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
-            }
-        }
-        NEXT();
-    }
-
-    LABEL(kMaskPush)
-        condPtr[1] = POP().fSigned;
-        maskPtr[1] = maskPtr[0] & condPtr[1];
-        ++condPtr; ++maskPtr;
-        NEXT();
-    LABEL(kMaskPop)
-        --condPtr; --maskPtr;
-        NEXT();
-    LABEL(kMaskNegate)
-        maskPtr[0] = maskPtr[-1] & ~condPtr[0];
-        NEXT();
-    LABEL(kMaskBlend) {
-        int count = READ8();
-        I32 m = condPtr[0];
-        --condPtr; --maskPtr;
-        for (int i = 0; i < count; ++i) {
-            sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
-            --sp;
-        }
-        NEXT();
-    }
-    LABEL(kBranchIfAllFalse) {
-        int target = READ16();
-        if (!skvx::any(mask())) {
-            ip = code + target;
-        }
-        NEXT();
-    }
-
-    LABEL(kLoopBegin)
-        contPtr[1] = 0;
-        loopPtr[1] = loopPtr[0];
-        ++contPtr; ++loopPtr;
-        NEXT();
-    LABEL(kLoopNext)
-        *loopPtr |= *contPtr;
-        *contPtr = 0;
-        NEXT();
-    LABEL(kLoopMask)
-        *loopPtr &= POP().fSigned;
-        NEXT();
-    LABEL(kLoopEnd)
-        --contPtr; --loopPtr;
-        NEXT();
-    LABEL(kLoopBreak)
-        *loopPtr &= ~mask();
-        NEXT();
-    LABEL(kLoopContinue) {
-        I32 m = mask();
-        *contPtr |=  m;
-        *loopPtr &= ~m;
-        NEXT();
-    }
-#ifdef SKSLC_THREADED_CODE
-    #ifdef TRACE
-        next:
-            printf("at %3d (stack: %d) (disable threaded code for disassembly)\n",
-                   (int) (ip - code), (int) (sp - stack) + 1);
-            goto *READ_INST();
-    #endif
-#else
-        }
-    }
-#endif
 }
 
 }; // class Interpreter
@@ -1424,208 +1167,6 @@
 #endif
 }
 
-#define VECTOR_PREPROCESS(base)          \
-    case ByteCodeInstruction::base ## 4: \
-    case ByteCodeInstruction::base ## 3: \
-    case ByteCodeInstruction::base ## 2: \
-    case ByteCodeInstruction::base: READ8(); break;
-
-#define VECTOR_PREPROCESS_NO_COUNT(base) \
-    case ByteCodeInstruction::base ## 4: \
-    case ByteCodeInstruction::base ## 3: \
-    case ByteCodeInstruction::base ## 2: \
-    case ByteCodeInstruction::base: break;
-
-#define VECTOR_MATRIX_PREPROCESS(base) \
-    VECTOR_PREPROCESS(base)            \
-    case ByteCodeInstruction::base ## N: READ8(); break;
-
-#define VECTOR_MATRIX_PREPROCESS_NO_COUNT(base) \
-    VECTOR_PREPROCESS_NO_COUNT(base)            \
-    case ByteCodeInstruction::base ## N: READ8(); break;
-
-void ByteCodeFunction::preprocess(const void* labels[]) {
-#if defined(SK_ENABLE_SKSL_INTERPRETER)
-#ifdef TRACE
-    this->disassemble();
-#endif
-    uint8_t* ip = fCode.data();
-    while (ip < fCode.data() + fCode.size()) {
-        ByteCodeInstruction inst = (ByteCodeInstruction) (intptr_t) READ_INST();
-        const void* label = labels[(int) inst];
-        memcpy(ip - sizeof(instruction), &label, sizeof(label));
-        switch (inst) {
-            VECTOR_MATRIX_PREPROCESS(kAddF)
-            VECTOR_PREPROCESS(kAddI)
-            case ByteCodeInstruction::kAndB: break;
-            case ByteCodeInstruction::kBranch: READ16(); break;
-            case ByteCodeInstruction::kCall: READ8(); break;
-            case ByteCodeInstruction::kCallExternal: {
-                READ8();
-                READ8();
-                READ8();
-                break;
-            }
-            case ByteCodeInstruction::kClampIndex: READ8(); break;
-            VECTOR_PREPROCESS(kCompareIEQ)
-            VECTOR_PREPROCESS(kCompareINEQ)
-            VECTOR_MATRIX_PREPROCESS(kCompareFEQ)
-            VECTOR_MATRIX_PREPROCESS(kCompareFNEQ)
-            VECTOR_PREPROCESS(kCompareFGT)
-            VECTOR_PREPROCESS(kCompareFGTEQ)
-            VECTOR_PREPROCESS(kCompareFLT)
-            VECTOR_PREPROCESS(kCompareFLTEQ)
-            VECTOR_PREPROCESS(kCompareSGT)
-            VECTOR_PREPROCESS(kCompareSGTEQ)
-            VECTOR_PREPROCESS(kCompareSLT)
-            VECTOR_PREPROCESS(kCompareSLTEQ)
-            VECTOR_PREPROCESS(kCompareUGT)
-            VECTOR_PREPROCESS(kCompareUGTEQ)
-            VECTOR_PREPROCESS(kCompareULT)
-            VECTOR_PREPROCESS(kCompareULTEQ)
-            VECTOR_PREPROCESS_NO_COUNT(kConvertFtoI)
-            VECTOR_PREPROCESS_NO_COUNT(kConvertStoF)
-            VECTOR_PREPROCESS_NO_COUNT(kConvertUtoF)
-            VECTOR_PREPROCESS(kCos)
-            VECTOR_MATRIX_PREPROCESS(kDivideF)
-            VECTOR_PREPROCESS(kDivideS)
-            VECTOR_PREPROCESS(kDivideU)
-            VECTOR_MATRIX_PREPROCESS(kDup)
-
-            case ByteCodeInstruction::kInverse2x2:
-            case ByteCodeInstruction::kInverse3x3:
-            case ByteCodeInstruction::kInverse4x4: break;
-
-            case ByteCodeInstruction::kLoad:
-            case ByteCodeInstruction::kLoad2:
-            case ByteCodeInstruction::kLoad3:
-            case ByteCodeInstruction::kLoad4:
-            case ByteCodeInstruction::kLoadGlobal:
-            case ByteCodeInstruction::kLoadGlobal2:
-            case ByteCodeInstruction::kLoadGlobal3:
-            case ByteCodeInstruction::kLoadGlobal4:
-            case ByteCodeInstruction::kLoadUniform:
-            case ByteCodeInstruction::kLoadUniform2:
-            case ByteCodeInstruction::kLoadUniform3:
-            case ByteCodeInstruction::kLoadUniform4: READ16(); break;
-
-            case ByteCodeInstruction::kLoadSwizzle:
-            case ByteCodeInstruction::kLoadSwizzleGlobal:
-            case ByteCodeInstruction::kLoadSwizzleUniform: {
-                READ8();
-                int count = READ8();
-                ip += count;
-                break;
-            }
-
-            case ByteCodeInstruction::kLoadExtended:
-            case ByteCodeInstruction::kLoadExtendedGlobal:
-            case ByteCodeInstruction::kLoadExtendedUniform:
-                READ8();
-                break;
-
-            case ByteCodeInstruction::kMatrixToMatrix: {
-                READ8();
-                READ8();
-                READ8();
-                READ8();
-                break;
-            }
-            case ByteCodeInstruction::kMatrixMultiply: {
-                READ8();
-                READ8();
-                READ8();
-                break;
-            }
-            VECTOR_MATRIX_PREPROCESS(kMultiplyF)
-            VECTOR_PREPROCESS(kMultiplyI)
-            VECTOR_MATRIX_PREPROCESS_NO_COUNT(kNegateF)
-            VECTOR_PREPROCESS_NO_COUNT(kNegateI)
-            case ByteCodeInstruction::kNotB: break;
-            case ByteCodeInstruction::kOrB: break;
-            VECTOR_MATRIX_PREPROCESS_NO_COUNT(kPop)
-            case ByteCodeInstruction::kPushImmediate: READ32(); break;
-
-            case ByteCodeInstruction::kReadExternal:
-            case ByteCodeInstruction::kReadExternal2:
-            case ByteCodeInstruction::kReadExternal3:
-            case ByteCodeInstruction::kReadExternal4: READ16(); break;
-
-            VECTOR_PREPROCESS(kRemainderF)
-            VECTOR_PREPROCESS(kRemainderS)
-            VECTOR_PREPROCESS(kRemainderU)
-            case ByteCodeInstruction::kReserve: READ8(); break;
-            case ByteCodeInstruction::kReturn: READ8(); break;
-            case ByteCodeInstruction::kScalarToMatrix: READ8(); READ8(); break;
-            case ByteCodeInstruction::kShiftLeft: READ8(); break;
-            case ByteCodeInstruction::kShiftRightS: READ8(); break;
-            case ByteCodeInstruction::kShiftRightU: READ8(); break;
-            VECTOR_PREPROCESS(kSin)
-            VECTOR_PREPROCESS_NO_COUNT(kSqrt)
-
-            case ByteCodeInstruction::kStore:
-            case ByteCodeInstruction::kStore2:
-            case ByteCodeInstruction::kStore3:
-            case ByteCodeInstruction::kStore4:
-            case ByteCodeInstruction::kStoreGlobal:
-            case ByteCodeInstruction::kStoreGlobal2:
-            case ByteCodeInstruction::kStoreGlobal3:
-            case ByteCodeInstruction::kStoreGlobal4: READ8(); break;
-
-            case ByteCodeInstruction::kStoreSwizzle:
-            case ByteCodeInstruction::kStoreSwizzleGlobal: {
-                READ8();
-                int count = READ8();
-                ip += count;
-                break;
-            }
-
-            case ByteCodeInstruction::kStoreSwizzleIndirect:
-            case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
-                int count = READ8();
-                ip += count;
-                break;
-            }
-
-            case ByteCodeInstruction::kStoreExtended: READ8(); break;
-            case ByteCodeInstruction::kStoreExtendedGlobal: READ8(); break;
-
-            VECTOR_MATRIX_PREPROCESS(kSubtractF)
-            VECTOR_PREPROCESS(kSubtractI)
-
-            case ByteCodeInstruction::kSwizzle: {
-                READ8();
-                int count = READ8();
-                ip += count;
-                break;
-            }
-            VECTOR_PREPROCESS(kTan)
-            case ByteCodeInstruction::kWriteExternal:
-            case ByteCodeInstruction::kWriteExternal2:
-            case ByteCodeInstruction::kWriteExternal3:
-            case ByteCodeInstruction::kWriteExternal4: READ16(); break;
-
-            case ByteCodeInstruction::kXorB: break;
-            case ByteCodeInstruction::kMaskPush: break;
-            case ByteCodeInstruction::kMaskPop: break;
-            case ByteCodeInstruction::kMaskNegate: break;
-            case ByteCodeInstruction::kMaskBlend: READ8(); break;
-            case ByteCodeInstruction::kBranchIfAllFalse: READ16(); break;
-            case ByteCodeInstruction::kLoopBegin: break;
-            case ByteCodeInstruction::kLoopNext: break;
-            case ByteCodeInstruction::kLoopMask: break;
-            case ByteCodeInstruction::kLoopEnd: break;
-            case ByteCodeInstruction::kLoopContinue:  break;
-            case ByteCodeInstruction::kLoopBreak: break;
-            default:
-                ip -= 2;
-                printf("unknown(%d)\n", READ16());
-                SkASSERT(false);
-        }
-    }
-#endif
-}
-
 bool ByteCode::run(const ByteCodeFunction* f,
                    float* args, int argCount,
                    float* outReturn, int returnCount,