Revert "Revert "switched SkSL interpreter over to threaded code""

This reverts commit cbdc829ff126b31437d6fae5e860d182ec14a963.

Bug: skia:
Change-Id: Idb7ad413431e6556bc040369df258655c2349ef2
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239447
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index 817847d..6977ea4 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -441,6 +441,7 @@
                     this->write(ByteCodeInstruction::kPushImmediate);
                     this->write32(offset);
                     this->write(ByteCodeInstruction::kAddI);
+                    this->write8(1);
                 }
                 return -1;
             } else {
@@ -475,6 +476,7 @@
                     this->write(ByteCodeInstruction::kPushImmediate);
                     this->write32(stride);
                     this->write(ByteCodeInstruction::kMultiplyI);
+                    this->write8(1);
                 }
             }
             int baseAddr = this->getLocation(*i.fBase, storage);
@@ -501,6 +503,7 @@
                 this->write32(offset);
             }
             this->write(ByteCodeInstruction::kAddI);
+            this->write8(1);
             return -1;
         }
         case Expression::kSwizzle_Kind: {
@@ -513,6 +516,7 @@
                     this->write(ByteCodeInstruction::kPushImmediate);
                     this->write32(offset);
                     this->write(ByteCodeInstruction::kAddI);
+                    this->write8(1);
                 }
                 return -1;
             } else {
@@ -556,19 +560,22 @@
         case ByteCodeInstruction::kMaskBlend: this->exitCondition();  break;
         default: /* Do nothing */ break;
     }
-    this->write16((uint16_t)i);
+    instruction val = (instruction) i;
+    size_t n = fCode->size();
+    fCode->resize(n + sizeof(val));
+    memcpy(fCode->data() + n, &val, sizeof(val));
     fStackCount += StackUsage(i, count);
     fMaxStackCount = std::max(fMaxStackCount, fStackCount);
 }
 
 static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
     SkASSERT(count >= 1 && count <= 4);
-    return ((ByteCodeInstruction) ((int) base + count - 1));
+    return ((ByteCodeInstruction) ((int) base + 1 - count));
 }
 
 void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCodeInstruction s,
                                               ByteCodeInstruction u, ByteCodeInstruction f,
-                                              int count) {
+                                              int count, bool writeCount) {
     switch (type_category(type)) {
         case TypeCategory::kSigned:
             this->write(vector_instruction(s, count));
@@ -578,8 +585,7 @@
             break;
         case TypeCategory::kFloat: {
             if (count > 4) {
-                this->write((ByteCodeInstruction)((int)f + 4), count);
-                this->write8(count);
+                this->write((ByteCodeInstruction)((int)f + 1), count);
             } else {
                 this->write(vector_instruction(f, count));
             }
@@ -588,6 +594,9 @@
         default:
             SkASSERT(false);
     }
+    if (writeCount) {
+        this->write8(count);
+    }
 }
 
 bool ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b, bool discard) {
@@ -614,6 +623,7 @@
         if (!lVecOrMtx && rVecOrMtx) {
             for (int i = SlotCount(rType); i > 1; --i) {
                 this->write(ByteCodeInstruction::kDup);
+                this->write8(1);
             }
         }
     }
@@ -621,6 +631,7 @@
     if (lVecOrMtx && !rVecOrMtx) {
         for (int i = SlotCount(lType); i > 1; --i) {
             this->write(ByteCodeInstruction::kDup);
+            this->write8(1);
         }
     }
     // Special case for M*V, V*M, M*M (but not V*V!)
@@ -799,6 +810,7 @@
                 SkASSERT(outType.kind() == Type::kVector_Kind);
                 for (; inCount != outCount; ++inCount) {
                     this->write(ByteCodeInstruction::kDup);
+                    this->write8(1);
                 }
             }
         }
@@ -822,8 +834,9 @@
 }
 
 void ByteCodeGenerator::writeExternalValue(const ExternalValueReference& e) {
-    this->write(vector_instruction(ByteCodeInstruction::kReadExternal,
-                                   SlotCount(e.fValue->type())));
+    int count = SlotCount(e.fValue->type());
+    this->write(vector_instruction(ByteCodeInstruction::kReadExternal, count));
+    this->write8(count);
     int index = fOutput->fExternalValues.size();
     fOutput->fExternalValues.push_back(e.fValue);
     SkASSERT(index <= 255);
@@ -848,6 +861,7 @@
         this->write(vector_instruction(isGlobal ? ByteCodeInstruction::kLoadGlobal
                                                 : ByteCodeInstruction::kLoad,
                                        count));
+        this->write8(count);
         this->write8(location);
     }
 }
@@ -876,9 +890,11 @@
             case SpecialIntrinsic::kDot: {
                 SkASSERT(c.fArguments.size() == 2);
                 SkASSERT(count == SlotCount(c.fArguments[1]->fType));
-                this->write((ByteCodeInstruction)((int)ByteCodeInstruction::kMultiplyF + count-1));
+                this->write(vector_instruction(ByteCodeInstruction::kMultiplyF, count));
+                this->write8(count);
                 for (int i = count; i > 1; --i) {
                     this->write(ByteCodeInstruction::kAddF);
+                    this->write8(1);
                 }
                 break;
             }
@@ -889,11 +905,14 @@
         switch (found->second.fValue.fInstruction) {
             case ByteCodeInstruction::kCos:
             case ByteCodeInstruction::kSin:
-            case ByteCodeInstruction::kSqrt:
             case ByteCodeInstruction::kTan:
                 SkASSERT(c.fArguments.size() > 0);
-                this->write((ByteCodeInstruction) ((int) found->second.fValue.fInstruction +
-                            count - 1));
+                this->write(vector_instruction(found->second.fValue.fInstruction, count));
+                this->write8(count);
+                break;
+            case ByteCodeInstruction::kSqrt:
+                SkASSERT(c.fArguments.size() > 0);
+                this->write(vector_instruction(found->second.fValue.fInstruction, count));
                 break;
             case ByteCodeInstruction::kInverse2x2: {
                 SkASSERT(c.fArguments.size() > 0);
@@ -1039,7 +1058,8 @@
                                         ByteCodeInstruction::kNegateI,
                                         ByteCodeInstruction::kNegateI,
                                         ByteCodeInstruction::kNegateF,
-                                        SlotCount(p.fOperand->fType));
+                                        SlotCount(p.fOperand->fType),
+                                        false);
             break;
         }
         default:
@@ -1058,6 +1078,7 @@
             // If we're not supposed to discard the result, then make a copy *before* the +/-
             if (!discard) {
                 this->write(ByteCodeInstruction::kDup);
+                this->write8(1);
             }
             this->write(ByteCodeInstruction::kPushImmediate);
             this->write32(type_category(p.fType) == TypeCategory::kFloat ? float_to_bits(1.0f) : 1);
@@ -1202,14 +1223,17 @@
 
     void load() override {
         fGenerator.write(vector_instruction(ByteCodeInstruction::kReadExternal, fCount));
+        fGenerator.write8(fCount);
         fGenerator.write8(fIndex);
     }
 
     void store(bool discard) override {
         if (!discard) {
             fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, fCount));
+            fGenerator.write8(fCount);
         }
         fGenerator.write(vector_instruction(ByteCodeInstruction::kWriteExternal, fCount));
+        fGenerator.write8(fCount);
         fGenerator.write8(fIndex);
     }
 
@@ -1235,6 +1259,7 @@
         int count = fSwizzle.fComponents.size();
         if (!discard) {
             fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count));
+            fGenerator.write8(count);
         }
         Variable::Storage storage = Variable::kLocal_Storage;
         int location = fGenerator.getLocation(*fSwizzle.fBase, &storage);
@@ -1279,6 +1304,7 @@
                 fGenerator.write8(count);
             } else {
                 fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count));
+                fGenerator.write8(count);
             }
         }
         Variable::Storage storage = Variable::kLocal_Storage;