Binary elementwise microkernels
- ADD/MUL/SUB microkernel templates in scalar, PSIMD, SSE, NEON implementations
- ADDC/MULC/SUBC/RSUBC microkernel templates in scalar, PSIMD, SSE, NEON implementations
- Unit test generators
- Remove legacy (non-generated) microkernel implementations
PiperOrigin-RevId: 280528154
diff --git a/src/f32-binop/vop-scalar.c.in b/src/f32-binop/vop-scalar.c.in
new file mode 100644
index 0000000..ab6cfba
--- /dev/null
+++ b/src/f32-binop/vop-scalar.c.in
@@ -0,0 +1,85 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+$assert BATCH_TILE >= 1
+$ABC = "0123456789ABCDEFGHIJKLMN"
+$assert OP in ["ADD", "MUL", "SUB"]
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vbinop.h>
+
+
+$OP_FUNC = {
+$ "ADD": lambda x, y: "%s + %s" % (x, y),
+$ "MUL": lambda x, y: "%s * %s" % (x, y),
+$ "SUB": lambda x, y: "%s - %s" % (x, y),
+$}[OP]
+void xnn_f32_v${OP.lower()}_ukernel__scalar_x${BATCH_TILE}(
+ size_t n,
+ const float* a,
+ const float* b,
+ float* y,
+ const union xnn_f32_output_params params[restrict static 1])
+{
+ assert(n != 0);
+ assert(n % sizeof(float) == 0);
+
+ const float vy_min = params->scalar.min;
+ const float vy_max = params->scalar.max;
+
+ $if BATCH_TILE > 1:
+ for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
+ $for N in range(BATCH_TILE):
+ const float va${ABC[N]} = a[${N}];
+ a += ${BATCH_TILE};
+
+ $for N in range(BATCH_TILE):
+ const float vb${ABC[N]} = b[${N}];
+ b += ${BATCH_TILE};
+
+ $for N in range(BATCH_TILE):
+ float vy${ABC[N]} = ${OP_FUNC("va" + ABC[N], "vb" + ABC[N])};
+
+ $for N in range(BATCH_TILE):
+ vy${ABC[N]} = math_max_f32(vy${ABC[N]}, vy_min);
+
+ $for N in range(BATCH_TILE):
+ vy${ABC[N]} = math_min_f32(vy${ABC[N]}, vy_max);
+
+ $for N in range(BATCH_TILE):
+ y[${N}] = vy${ABC[N]};
+ y += ${BATCH_TILE};
+ }
+ if XNN_UNLIKELY(n != 0) {
+ $if BATCH_TILE > 2:
+ do {
+ const float va = *a++;
+ const float vb = *b++;
+ float vy = ${OP_FUNC("va", "vb")};
+ vy = math_max_f32(vy, vy_min);
+ vy = math_min_f32(vy, vy_max);
+ *y++ = vy;
+ n -= sizeof(float);
+ } while (n != 0);
+ $else:
+ const float va = *a;
+ const float vb = *b;
+ float vy = ${OP_FUNC("va", "vb")};
+ vy = math_max_f32(vy, vy_min);
+ vy = math_min_f32(vy, vy_max);
+ *y = vy;
+ }
+ $else:
+ for (; n >= sizeof(float); n -= sizeof(float)) {
+ const float va = *a++;
+ const float vb = *b++;
+ float vy = ${OP_FUNC("va", "vb")};
+ vy = math_max_f32(vy, vy_min);
+ vy = math_min_f32(vy, vy_max);
+ *y++ = vy;
+ }
+}