Reland "Reland "Interpreter: Support striped inputs for less overhead""
This reverts commit 2c59b4e9ea856231e6c75608b66f202d16201679.
Change-Id: I2b06936994430722b8fc3890ff9b4a6f4710db04
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/221998
Reviewed-by: Brian Osman <brianosman@google.com>
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/sksl/SkSLByteCode.cpp b/src/sksl/SkSLByteCode.cpp
index 8b9fc3e..23730c0 100644
--- a/src/sksl/SkSLByteCode.cpp
+++ b/src/sksl/SkSLByteCode.cpp
@@ -1041,6 +1041,57 @@
}
}
+void ByteCode::runStriped(const ByteCodeFunction* f, float* args[], int nargs, int N,
+ const float* uniforms, int uniformCount) const {
+#ifdef TRACE
+ disassemble(f);
+#endif
+ Interpreter::VValue stack[128];
+
+ // Needs to be the first N non-negative integers, at least as large as VecWidth
+ static const Interpreter::I32 gLanes = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ };
+
+ SkASSERT(f->fReturnCount == 0);
+ SkASSERT(nargs == f->fParameterCount);
+ SkASSERT(uniformCount == (int)fInputSlots.size());
+ Interpreter::VValue globals[32];
+ SkASSERT((int)SK_ARRAY_COUNT(globals) >= fGlobalCount);
+ for (uint8_t slot : fInputSlots) {
+ globals[slot].fFloat = *uniforms++;
+ }
+
+ while (N) {
+ int w = std::min(N, Interpreter::VecWidth);
+
+ // Copy args into stack
+ for (int i = 0; i < nargs; ++i) {
+ memcpy(stack + i, args[i], w * sizeof(float));
+ }
+
+ auto mask = w > gLanes;
+ innerRun(this, f, stack, nullptr, mask, globals);
+
+ // Copy out parameters back
+ int slot = 0;
+ for (const auto& p : f->fParameters) {
+ if (p.fIsOutParameter) {
+ for (int i = slot; i < slot + p.fSlotCount; ++i) {
+ memcpy(args[i], stack + i, w * sizeof(float));
+ }
+ }
+ slot += p.fSlotCount;
+ }
+
+ // Step each argument pointer ahead
+ for (int i = 0; i < nargs; ++i) {
+ args[i] += w;
+ }
+ N -= w;
+ }
+}
+
} // namespace SkSL
#endif