Interpreter: Change all Load/Store ops to use immediate indices

Avoids the need for a PushImmediate (which usually also required
a Nop for alignment) on every load/store. And it makes the codegen
for local vs. global identical, which simplifies the byte-code
generator in a few spots.

This means that places previously using DupDown now need something that
dupes the *top* N elements of the stack. Just use Vector + Dup, and
remove the (now unused) DupDown instruction.

Also add/implement kStoreSwizzleGlobal, which was the last missing
load/store op (and add a test case).

Change-Id: Id450272c11f4f1842c9d57bc5114e7f81e9e926e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/214062
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index ed5ad39..74464c2 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -109,6 +109,7 @@
         case Variable::kLocal_Storage: {
             for (int i = fLocals.size() - 1; i >= 0; --i) {
                 if (fLocals[i] == &var) {
+                    SkASSERT(fParameterCount + i <= 255);
                     return fParameterCount + i;
                 }
             }
@@ -117,18 +118,20 @@
             for (int i = 0; i < slot_count(var.fType) - 1; ++i) {
                 fLocals.push_back(nullptr);
             }
+            SkASSERT(result <= 255);
             return result;
         }
         case Variable::kParameter_Storage: {
             int offset = 0;
             for (const auto& p : fFunction->fDeclaration.fParameters) {
                 if (p == &var) {
+                    SkASSERT(offset <= 255);
                     return offset;
                 }
                 offset += slot_count(p->fType);
             }
             SkASSERT(false);
-            return -1;
+            return 0;
         }
         case Variable::kGlobal_Storage: {
             int offset = 0;
@@ -149,7 +152,7 @@
                 }
             }
             SkASSERT(false);
-            return -1;
+            return 0;
         }
         default:
             SkASSERT(false);
@@ -448,15 +451,10 @@
     switch (s.fBase->fKind) {
         case Expression::kVariableReference_Kind: {
             const Variable& var = ((VariableReference&) *s.fBase).fVariable;
-            if (var.fStorage == Variable::kGlobal_Storage) {
-                this->write(ByteCodeInstruction::kLoadSwizzleGlobal);
-                this->write8(this->getLocation(var));
-            } else {
-                this->align(4, 3);
-                this->write(ByteCodeInstruction::kPushImmediate);
-                this->write32(this->getLocation(var));
-                this->write(ByteCodeInstruction::kLoadSwizzle);
-            }
+            this->write(var.fStorage == Variable::kGlobal_Storage
+                            ? ByteCodeInstruction::kLoadSwizzleGlobal
+                            : ByteCodeInstruction::kLoadSwizzle);
+            this->write8(this->getLocation(var));
             this->write8(s.fComponents.size());
             for (int c : s.fComponents) {
                 this->write8(c);
@@ -475,25 +473,15 @@
 }
 
 void ByteCodeGenerator::writeVariableReference(const VariableReference& v) {
-    if (v.fVariable.fStorage == Variable::kGlobal_Storage) {
-        int count = slot_count(v.fType);
-        if (count > 1) {
-            this->write(ByteCodeInstruction::kVector);
-            this->write8(count);
-        }
-        this->write(ByteCodeInstruction::kLoadGlobal);
-        this->write8(this->getLocation(v.fVariable));
-    } else {
-        this->align(4, 3);
-        this->write(ByteCodeInstruction::kPushImmediate);
-        this->write32(this->getLocation(v.fVariable));
-        int count = slot_count(v.fType);
-        if (count > 1) {
-            this->write(ByteCodeInstruction::kVector);
-            this->write8(count);
-        }
-        this->write(ByteCodeInstruction::kLoad);
+    int count = slot_count(v.fType);
+    if (count > 1) {
+        this->write(ByteCodeInstruction::kVector);
+        this->write8(count);
     }
+    this->write(v.fVariable.fStorage == Variable::kGlobal_Storage
+                    ? ByteCodeInstruction::kLoadGlobal
+                    : ByteCodeInstruction::kLoad);
+    this->write8(this->getLocation(v.fVariable));
 }
 
 void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t) {
@@ -554,21 +542,6 @@
     }
 }
 
-void ByteCodeGenerator::writeTarget(const Expression& e) {
-    switch (e.fKind) {
-        case Expression::kVariableReference_Kind:
-            this->align(4, 3);
-            this->write(ByteCodeInstruction::kPushImmediate);
-            this->write32(this->getLocation(((VariableReference&) e).fVariable));
-            break;
-        case Expression::kIndex_Kind:
-        case Expression::kTernary_Kind:
-        default:
-            printf("unsupported target %s\n", e.description().c_str());
-            SkASSERT(false);
-    }
-}
-
 class ByteCodeExternalValueLValue : public ByteCodeGenerator::LValue {
 public:
     ByteCodeExternalValueLValue(ByteCodeGenerator* generator, ExternalValue& value, int index)
@@ -612,24 +585,25 @@
     ByteCodeSwizzleLValue(ByteCodeGenerator* generator, const Swizzle& swizzle)
         : INHERITED(*generator)
         , fSwizzle(swizzle) {
-        fGenerator.writeTarget(*swizzle.fBase);
+        SkASSERT(fSwizzle.fBase->fKind == Expression::kVariableReference_Kind);
     }
 
     void load() override {
-        fGenerator.write(ByteCodeInstruction::kDup);
-        fGenerator.write(ByteCodeInstruction::kLoadSwizzle);
-        fGenerator.write8(fSwizzle.fComponents.size());
-        for (int c : fSwizzle.fComponents) {
-            fGenerator.write8(c);
-        }
+        fGenerator.writeSwizzle(fSwizzle);
     }
 
     void store() override {
-        size_t count = fSwizzle.fComponents.size();
-        fGenerator.write(ByteCodeInstruction::kDupDown);
-        fGenerator.write8(count);
-        fGenerator.write(ByteCodeInstruction::kStoreSwizzle);
-        fGenerator.write8(count);
+        const Variable& var = ((VariableReference&)*fSwizzle.fBase).fVariable;
+        if (fSwizzle.fComponents.size() > 1) {
+            fGenerator.write(ByteCodeInstruction::kVector);
+            fGenerator.write8(fSwizzle.fComponents.size());
+        }
+        fGenerator.write(ByteCodeInstruction::kDup);
+        fGenerator.write(var.fStorage == Variable::kGlobal_Storage
+                            ? ByteCodeInstruction::kStoreSwizzleGlobal
+                            : ByteCodeInstruction::kStoreSwizzle);
+        fGenerator.write8(fGenerator.getLocation(var));
+        fGenerator.write8(fSwizzle.fComponents.size());
         for (int c : fSwizzle.fComponents) {
             fGenerator.write8(c);
         }
@@ -646,37 +620,40 @@
     ByteCodeVariableLValue(ByteCodeGenerator* generator, const Variable& var)
         : INHERITED(*generator)
         , fCount(slot_count(var.fType))
+        , fLocation(generator->getLocation(var))
         , fIsGlobal(var.fStorage == Variable::kGlobal_Storage) {
-        fGenerator.align(4, 3);
-        fGenerator.write(ByteCodeInstruction::kPushImmediate);
-        fGenerator.write32(generator->getLocation(var));
     }
 
     void load() override {
+        if (fCount > 1) {
+            fGenerator.write(ByteCodeInstruction::kVector);
+            fGenerator.write8(fCount);
+        }
+        fGenerator.write(fIsGlobal ? ByteCodeInstruction::kLoadGlobal
+                                   : ByteCodeInstruction::kLoad);
+        fGenerator.write8(fLocation);
+    }
+
+    void store() override {
+        if (fCount > 1) {
+            fGenerator.write(ByteCodeInstruction::kVector);
+            fGenerator.write8(fCount);
+        }
         fGenerator.write(ByteCodeInstruction::kDup);
         if (fCount > 1) {
             fGenerator.write(ByteCodeInstruction::kVector);
             fGenerator.write8(fCount);
         }
-        fGenerator.write(fIsGlobal ? ByteCodeInstruction::kLoadGlobal : ByteCodeInstruction::kLoad);
-    }
-
-    void store() override {
-        fGenerator.write(ByteCodeInstruction::kDupDown);
-        fGenerator.write8(fCount);
-        if (fCount > 1) {
-            fGenerator.write(ByteCodeInstruction::kVector);
-            fGenerator.write8(fCount);
-        }
         fGenerator.write(fIsGlobal ? ByteCodeInstruction::kStoreGlobal
-                                  : ByteCodeInstruction::kStore);
+                                   : ByteCodeInstruction::kStore);
+        fGenerator.write8(fLocation);
     }
 
 private:
     typedef LValue INHERITED;
 
     int fCount;
-
+    int fLocation;
     bool fIsGlobal;
 };
 
@@ -825,9 +802,6 @@
         // has been allocated
         int location = getLocation(*decl.fVar);
         if (decl.fValue) {
-            this->align(4, 3);
-            this->write(ByteCodeInstruction::kPushImmediate);
-            this->write32(location);
             this->writeExpression(*decl.fValue);
             int count = slot_count(decl.fValue->fType);
             if (count > 1) {
@@ -835,6 +809,7 @@
                 this->write8(count);
             }
             this->write(ByteCodeInstruction::kStore);
+            this->write8(location);
         }
     }
 }