Fixed integer divide / remainder in sksl interpreter

Our integer tests were broken: constant folding was optimizing away
the actual operations, so nothing was actually being tested. This
allowed us to not realize that integer divide / remainder didn't
work: vector division signals if any of divisor's lanes are zero,
and zeroes are common in masked-off lanes.

This replaces naive vector operations with loops and mask checks
for integer divide and remainder, and corrects the various broken
integer tests so they are actually doing stuff.

Change-Id: I6ffcad9e7b38a0bfd1604097f86d0faa24e1dbc7
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/243698
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/src/sksl/SkSLByteCode.cpp b/src/sksl/SkSLByteCode.cpp
index d4cc047..674cf35 100644
--- a/src/sksl/SkSLByteCode.cpp
+++ b/src/sksl/SkSLByteCode.cpp
@@ -288,6 +288,45 @@
         NEXT();                                       \
     }
 
+// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
+// in inactive vector lanesm, so we need to be sure to avoid masked-off lanes.
+#define VECTOR_BINARY_MASKED_OP(base, field, op)            \
+    LABEL(base ## 4)                                        \
+        for (int i = 0; i < VecWidth; ++i) {                \
+            if (mask()[i]) {                                \
+                sp[-4].field[i] op ## = sp[0].field[i];     \
+            }                                               \
+        }                                                   \
+        POP();                                              \
+        /* fall through */                                  \
+    LABEL(base ## 3) {                                      \
+        for (int i = 0; i < VecWidth; ++i) {                \
+            if (mask()[i]) {                                \
+                sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
+            }                                               \
+        }                                                   \
+        POP();                                              \
+    }   /* fall through */                                  \
+    LABEL(base ## 2) {                                      \
+        for (int i = 0; i < VecWidth; ++i) {                \
+            if (mask()[i]) {                                \
+                sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
+            }                                               \
+        }                                                   \
+        POP();                                              \
+    }   /* fall through */                                  \
+    LABEL(base) {                                           \
+        for (int i = 0; i < VecWidth; ++i) {                \
+            if (mask()[i]) {                                \
+                sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
+            }                                               \
+        }                                                   \
+        POP();                                              \
+        ++ip;                                               \
+        NEXT();                                             \
+    }
+
+
 #define VECTOR_MATRIX_BINARY_OP(base, field, op)          \
     VECTOR_BINARY_OP(base, field, op)                     \
     LABEL(base ## N) {                                    \
@@ -391,7 +430,6 @@
     int fParameterCount;
 };
 
-// TODO: trunc on integers?
 template <typename T>
 static T vec_mod(T a, T b) {
     return a - skvx::trunc(a / b) * b;
@@ -839,8 +877,8 @@
 
     VECTOR_UNARY_FN_VEC(kCos, cosf)
 
-    VECTOR_BINARY_OP(kDivideS, fSigned, /)
-    VECTOR_BINARY_OP(kDivideU, fUnsigned, /)
+    VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
+    VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
     VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
 
     LABEL(kDup4) PUSH(sp[1 - ip[0]]);
@@ -1040,8 +1078,8 @@
     }
 
     VECTOR_BINARY_FN(kRemainderF, fFloat, vec_mod<F32>)
-    VECTOR_BINARY_FN(kRemainderS, fSigned, vec_mod<I32>)
-    VECTOR_BINARY_FN(kRemainderU, fUnsigned, vec_mod<U32>)
+    VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
+    VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
 
     LABEL(kReserve)
         sp += READ8();