Interpreter: Vectorized interpreter

This converts the SkSL interpreter to operate in SIMT fashion. It handles
all the same features as the previous scalar implementation, but operates
on N lanes at a time. (Currently 8).

It's modeled after GPU and other parallel architectures, using execution
masks to handle control flow, including divergent control-flow.

Change-Id: Ieb38ffe2f55a10f72bdab844c297126fe9bedb6c
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/217122
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Mike Klein <mtklein@google.com>
diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp
index 56a3384..13ffc9e 100644
--- a/src/sksl/SkSLByteCodeGenerator.cpp
+++ b/src/sksl/SkSLByteCodeGenerator.cpp
@@ -459,6 +459,24 @@
                                             ByteCodeInstruction::kMultiplyF,
                                             count);
                 break;
+
+            case Token::Kind::LOGICALAND:
+                SkASSERT(type_category(lType) == SkSL::TypeCategory::kBool && count == 1);
+                this->write(ByteCodeInstruction::kAndB);
+                break;
+            case Token::Kind::LOGICALNOT:
+                SkASSERT(type_category(lType) == SkSL::TypeCategory::kBool && count == 1);
+                this->write(ByteCodeInstruction::kNotB);
+                break;
+            case Token::Kind::LOGICALOR:
+                SkASSERT(type_category(lType) == SkSL::TypeCategory::kBool && count == 1);
+                this->write(ByteCodeInstruction::kOrB);
+                break;
+            case Token::Kind::LOGICALXOR:
+                SkASSERT(type_category(lType) == SkSL::TypeCategory::kBool && count == 1);
+                this->write(ByteCodeInstruction::kXorB);
+                break;
+
             default:
                 SkASSERT(false);
         }
@@ -472,7 +490,7 @@
 
 void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) {
     this->write(ByteCodeInstruction::kPushImmediate);
-    this->write32(b.fValue ? 1 : 0);
+    this->write32(b.fValue ? ~0 : 0);
 }
 
 void ByteCodeGenerator::writeConstructor(const Constructor& c) {
@@ -744,14 +762,12 @@
 
 void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t) {
     this->writeExpression(*t.fTest);
-    this->write(ByteCodeInstruction::kConditionalBranch);
-    DeferredLocation trueLocation(this);
-    this->writeExpression(*t.fIfFalse);
-    this->write(ByteCodeInstruction::kBranch);
-    DeferredLocation endLocation(this);
-    trueLocation.set();
+    this->write(ByteCodeInstruction::kMaskPush);
     this->writeExpression(*t.fIfTrue);
-    endLocation.set();
+    this->write(ByteCodeInstruction::kMaskNegate);
+    this->writeExpression(*t.fIfFalse);
+    this->write(ByteCodeInstruction::kMaskBlend);
+    this->write8(SlotCount(t.fType));
 }
 
 void ByteCodeGenerator::writeExpression(const Expression& e, bool discard) {
@@ -976,25 +992,29 @@
 }
 
 void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) {
-    this->write(ByteCodeInstruction::kBranch);
-    fBreakTargets.top().emplace_back(this);
+    // TODO: Include BranchIfAllFalse to top-most LoopNext
+    this->write(ByteCodeInstruction::kLoopBreak);
 }
 
 void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) {
-    this->write(ByteCodeInstruction::kBranch);
-    fContinueTargets.top().emplace_back(this);
+    // TODO: Include BranchIfAllFalse to top-most LoopNext
+    this->write(ByteCodeInstruction::kLoopContinue);
 }
 
 void ByteCodeGenerator::writeDoStatement(const DoStatement& d) {
-    fContinueTargets.emplace();
-    fBreakTargets.emplace();
+    this->write(ByteCodeInstruction::kLoopBegin);
     size_t start = fCode->size();
     this->writeStatement(*d.fStatement);
-    this->setContinueTargets();
+    this->write(ByteCodeInstruction::kLoopNext);
     this->writeExpression(*d.fTest);
-    this->write(ByteCodeInstruction::kConditionalBranch);
+    this->write(ByteCodeInstruction::kLoopMask);
+    // TODO: Could shorten this with kBranchIfAnyTrue
+    this->write(ByteCodeInstruction::kBranchIfAllFalse);
+    DeferredLocation endLocation(this);
+    this->write(ByteCodeInstruction::kBranch);
     this->write16(start);
-    this->setBreakTargets();
+    endLocation.set();
+    this->write(ByteCodeInstruction::kLoopEnd);
 }
 
 void ByteCodeGenerator::writeForStatement(const ForStatement& f) {
@@ -1003,53 +1023,40 @@
     if (f.fInitializer) {
         this->writeStatement(*f.fInitializer);
     }
+    this->write(ByteCodeInstruction::kLoopBegin);
     size_t start = fCode->size();
     if (f.fTest) {
         this->writeExpression(*f.fTest);
-        this->write(ByteCodeInstruction::kNot);
-        this->write(ByteCodeInstruction::kConditionalBranch);
-        DeferredLocation endLocation(this);
-        this->writeStatement(*f.fStatement);
-        this->setContinueTargets();
-        if (f.fNext) {
-            this->writeExpression(*f.fNext, true);
-        }
-        this->write(ByteCodeInstruction::kBranch);
-        this->write16(start);
-        endLocation.set();
-    } else {
-        this->writeStatement(*f.fStatement);
-        this->setContinueTargets();
-        if (f.fNext) {
-            this->writeExpression(*f.fNext, true);
-        }
-        this->write(ByteCodeInstruction::kBranch);
-        this->write16(start);
+        this->write(ByteCodeInstruction::kLoopMask);
     }
-    this->setBreakTargets();
+    this->write(ByteCodeInstruction::kBranchIfAllFalse);
+    DeferredLocation endLocation(this);
+    this->writeStatement(*f.fStatement);
+    this->write(ByteCodeInstruction::kLoopNext);
+    if (f.fNext) {
+        this->writeExpression(*f.fNext, true);
+    }
+    this->write(ByteCodeInstruction::kBranch);
+    this->write16(start);
+    endLocation.set();
+    this->write(ByteCodeInstruction::kLoopEnd);
 }
 
 void ByteCodeGenerator::writeIfStatement(const IfStatement& i) {
+    this->writeExpression(*i.fTest);
+    this->write(ByteCodeInstruction::kMaskPush);
+    this->write(ByteCodeInstruction::kBranchIfAllFalse);
+    DeferredLocation falseLocation(this);
+    this->writeStatement(*i.fIfTrue);
+    falseLocation.set();
     if (i.fIfFalse) {
-        // if (test) { ..ifTrue.. } else { .. ifFalse .. }
-        this->writeExpression(*i.fTest);
-        this->write(ByteCodeInstruction::kConditionalBranch);
-        DeferredLocation trueLocation(this);
+        this->write(ByteCodeInstruction::kMaskNegate);
+        this->write(ByteCodeInstruction::kBranchIfAllFalse);
+        DeferredLocation endLocation(this);
         this->writeStatement(*i.fIfFalse);
-        this->write(ByteCodeInstruction::kBranch);
-        DeferredLocation endLocation(this);
-        trueLocation.set();
-        this->writeStatement(*i.fIfTrue);
-        endLocation.set();
-    } else {
-        // if (test) { ..ifTrue.. }
-        this->writeExpression(*i.fTest);
-        this->write(ByteCodeInstruction::kNot);
-        this->write(ByteCodeInstruction::kConditionalBranch);
-        DeferredLocation endLocation(this);
-        this->writeStatement(*i.fIfTrue);
         endLocation.set();
     }
+    this->write(ByteCodeInstruction::kMaskPop);
 }
 
 void ByteCodeGenerator::writeReturnStatement(const ReturnStatement& r) {
@@ -1086,19 +1093,18 @@
 }
 
 void ByteCodeGenerator::writeWhileStatement(const WhileStatement& w) {
-    fContinueTargets.emplace();
-    fBreakTargets.emplace();
-    size_t start = fCode->size();
+    this->write(ByteCodeInstruction::kLoopBegin);
+    size_t cond = fCode->size();
     this->writeExpression(*w.fTest);
-    this->write(ByteCodeInstruction::kNot);
-    this->write(ByteCodeInstruction::kConditionalBranch);
+    this->write(ByteCodeInstruction::kLoopMask);
+    this->write(ByteCodeInstruction::kBranchIfAllFalse);
     DeferredLocation endLocation(this);
     this->writeStatement(*w.fStatement);
-    this->setContinueTargets();
+    this->write(ByteCodeInstruction::kLoopNext);
     this->write(ByteCodeInstruction::kBranch);
-    this->write16(start);
+    this->write16(cond);
     endLocation.set();
-    this->setBreakTargets();
+    this->write(ByteCodeInstruction::kLoopEnd);
 }
 
 void ByteCodeGenerator::writeStatement(const Statement& s) {