Scalar QS8/QU8 -> F32 VCVT microkernels
PiperOrigin-RevId: 415466058
diff --git a/BUILD.bazel b/BUILD.bazel
index 7bcb674..339df12 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -782,6 +782,10 @@
"src/qs8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c",
"src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c",
"src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c",
+ "src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c",
+ "src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c",
+ "src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c",
+ "src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c",
"src/qs8-gavgpool/gen/7p7x-minmax-scalar-c1.c",
"src/qs8-gavgpool/gen/7p7x-minmax-scalar-c2.c",
"src/qs8-gavgpool/gen/7p7x-minmax-scalar-c4.c",
@@ -869,6 +873,10 @@
"src/qu8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c",
"src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c",
"src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c",
+ "src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c",
+ "src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c",
+ "src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c",
+ "src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c",
"src/qu8-gavgpool/7p7x-minmax-scalar-c1.c",
"src/qu8-gavgpool/7x-minmax-scalar-c1.c",
"src/qu8-gemm/gen/1x2-minmax-fp32-scalar-lrint.c",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1153494..dd608b1 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -912,6 +912,10 @@
src/qs8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c
src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c
src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c
+ src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c
+ src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c
+ src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c
+ src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c
src/qs8-gavgpool/gen/7p7x-minmax-scalar-c1.c
src/qs8-gavgpool/gen/7p7x-minmax-scalar-c2.c
src/qs8-gavgpool/gen/7p7x-minmax-scalar-c4.c
@@ -999,6 +1003,10 @@
src/qu8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c
src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c
src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c
+ src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c
+ src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c
+ src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c
+ src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c
src/qu8-gavgpool/7p7x-minmax-scalar-c1.c
src/qu8-gavgpool/7x-minmax-scalar-c1.c
src/qu8-gemm/gen/1x2-minmax-fp32-scalar-lrint.c
diff --git a/scripts/generate-qs8-f32-vcvt.sh b/scripts/generate-qs8-f32-vcvt.sh
index 3d1cb38..e6e8ffe 100755
--- a/scripts/generate-qs8-f32-vcvt.sh
+++ b/scripts/generate-qs8-f32-vcvt.sh
@@ -47,6 +47,17 @@
tools/xngen src/qs8-f32-vcvt/wasmsimd.c.in -D BATCH_TILE=24 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-wasmsimd-x24.c &
tools/xngen src/qs8-f32-vcvt/wasmsimd.c.in -D BATCH_TILE=32 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-wasmsimd-x32.c &
+#################################### Scalar ###################################
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=3 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c &
+
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=3 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c &
+
################################## Unit tests #################################
tools/generate-vcvt-test.py --spec test/qs8-f32-vcvt.yaml --output test/qs8-f32-vcvt.cc &
tools/generate-vcvt-test.py --spec test/qu8-f32-vcvt.yaml --output test/qu8-f32-vcvt.cc &
diff --git a/src/params-init.c b/src/params-init.c
index 8eaba10..9c77f97 100644
--- a/src/params-init.c
+++ b/src/params-init.c
@@ -2998,6 +2998,15 @@
}
#endif // XNN_ARCH_WASMSIMD
+XNN_INTERNAL void xnn_init_qs8_f32_cvt_scalar_params(
+ union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+ float scale,
+ int8_t zero_point)
+{
+ params->scalar.zero_point = (int32_t) zero_point;
+ params->scalar.scale = scale;
+}
+
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNN_INTERNAL void xnn_init_qs8_f32_cvt_neon_params(
union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
@@ -3055,6 +3064,16 @@
}
}
#endif // XNN_ARCH_WASMSIMD
+
+XNN_INTERNAL void xnn_init_qu8_f32_cvt_scalar_params(
+ union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+ float scale,
+ uint8_t zero_point)
+{
+ params->scalar.zero_point = (int32_t) zero_point;
+ params->scalar.scale = scale;
+}
+
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNN_INTERNAL void xnn_init_qu8_f32_cvt_neon_params(
union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c
new file mode 100644
index 0000000..bcc78d2
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c
@@ -0,0 +1,41 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x1(
+ size_t n,
+ const int8_t* x,
+ float* y,
+ const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(int8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(int8_t);
+ } while (n != 0);
+}
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c
new file mode 100644
index 0000000..7e55bc1
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c
@@ -0,0 +1,57 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x2(
+ size_t n,
+ const int8_t* x,
+ float* y,
+ const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(int8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ for (; n >= 2 * sizeof(int8_t); n -= 2 * sizeof(int8_t)) {
+ int32_t vx0 = (int32_t) x[0];
+ int32_t vx1 = (int32_t) x[1];
+ x += 2;
+
+ vx0 -= vzero_point;
+ vx1 -= vzero_point;
+
+ float vy0 = (float) vx0;
+ float vy1 = (float) vx1;
+
+ vy0 *= vscale;
+ vy1 *= vscale;
+
+ y[0] = vy0;
+ y[1] = vy1;
+ y += 2;
+ }
+ if XNN_UNLIKELY(n != 0) {
+ int32_t vx = *x;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y = vy;
+ }
+}
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c
new file mode 100644
index 0000000..191530c
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c
@@ -0,0 +1,66 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x3(
+ size_t n,
+ const int8_t* x,
+ float* y,
+ const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(int8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ for (; n >= 3 * sizeof(int8_t); n -= 3 * sizeof(int8_t)) {
+ int32_t vx0 = (int32_t) x[0];
+ int32_t vx1 = (int32_t) x[1];
+ int32_t vx2 = (int32_t) x[2];
+ x += 3;
+
+ vx0 -= vzero_point;
+ vx1 -= vzero_point;
+ vx2 -= vzero_point;
+
+ float vy0 = (float) vx0;
+ float vy1 = (float) vx1;
+ float vy2 = (float) vx2;
+
+ vy0 *= vscale;
+ vy1 *= vscale;
+ vy2 *= vscale;
+
+ y[0] = vy0;
+ y[1] = vy1;
+ y[2] = vy2;
+ y += 3;
+ }
+ if XNN_UNLIKELY(n != 0) {
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(int8_t);
+ } while (n != 0);
+ }
+}
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c
new file mode 100644
index 0000000..99f5b81
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c
@@ -0,0 +1,71 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x4(
+ size_t n,
+ const int8_t* x,
+ float* y,
+ const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(int8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ for (; n >= 4 * sizeof(int8_t); n -= 4 * sizeof(int8_t)) {
+ int32_t vx0 = (int32_t) x[0];
+ int32_t vx1 = (int32_t) x[1];
+ int32_t vx2 = (int32_t) x[2];
+ int32_t vx3 = (int32_t) x[3];
+ x += 4;
+
+ vx0 -= vzero_point;
+ vx1 -= vzero_point;
+ vx2 -= vzero_point;
+ vx3 -= vzero_point;
+
+ float vy0 = (float) vx0;
+ float vy1 = (float) vx1;
+ float vy2 = (float) vx2;
+ float vy3 = (float) vx3;
+
+ vy0 *= vscale;
+ vy1 *= vscale;
+ vy2 *= vscale;
+ vy3 *= vscale;
+
+ y[0] = vy0;
+ y[1] = vy1;
+ y[2] = vy2;
+ y[3] = vy3;
+ y += 4;
+ }
+ if XNN_UNLIKELY(n != 0) {
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(int8_t);
+ } while (n != 0);
+ }
+}
diff --git a/src/qs8-f32-vcvt/scalar.c.in b/src/qs8-f32-vcvt/scalar.c.in
new file mode 100644
index 0000000..173ee1f
--- /dev/null
+++ b/src/qs8-f32-vcvt/scalar.c.in
@@ -0,0 +1,81 @@
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+$assert BATCH_TILE >= 1
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+$XINT8_T = {"QS8": "int8_t", "QU8": "uint8_t"}[DATATYPE]
+void xnn_${DATATYPE.lower()}_f32_vcvt_ukernel__scalar_x${BATCH_TILE}(
+ size_t n,
+ const ${XINT8_T}* x,
+ float* y,
+ const union xnn_${DATATYPE.lower()}_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(${XINT8_T}) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ $if BATCH_TILE > 1:
+ for (; n >= ${BATCH_TILE} * sizeof(${XINT8_T}); n -= ${BATCH_TILE} * sizeof(${XINT8_T})) {
+ $for N in range(BATCH_TILE):
+ int32_t vx${N} = (int32_t) x[${N}];
+ x += ${BATCH_TILE};
+
+ $for N in range(BATCH_TILE):
+ vx${N} -= vzero_point;
+
+ $for N in range(BATCH_TILE):
+ float vy${N} = (float) vx${N};
+
+ $for N in range(BATCH_TILE):
+ vy${N} *= vscale;
+
+ $for N in range(BATCH_TILE):
+ y[${N}] = vy${N};
+ y += ${BATCH_TILE};
+ }
+ $if BATCH_TILE == 1:
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(${XINT8_T});
+ } while (n != 0);
+ $elif BATCH_TILE == 2:
+ if XNN_UNLIKELY(n != 0) {
+ int32_t vx = *x;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y = vy;
+ }
+ $else:
+ if XNN_UNLIKELY(n != 0) {
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(${XINT8_T});
+ } while (n != 0);
+ }
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c
new file mode 100644
index 0000000..9923dfd
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c
@@ -0,0 +1,41 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x1(
+ size_t n,
+ const uint8_t* x,
+ float* y,
+ const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(uint8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(uint8_t);
+ } while (n != 0);
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c
new file mode 100644
index 0000000..e7d269e
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c
@@ -0,0 +1,57 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x2(
+ size_t n,
+ const uint8_t* x,
+ float* y,
+ const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(uint8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ for (; n >= 2 * sizeof(uint8_t); n -= 2 * sizeof(uint8_t)) {
+ int32_t vx0 = (int32_t) x[0];
+ int32_t vx1 = (int32_t) x[1];
+ x += 2;
+
+ vx0 -= vzero_point;
+ vx1 -= vzero_point;
+
+ float vy0 = (float) vx0;
+ float vy1 = (float) vx1;
+
+ vy0 *= vscale;
+ vy1 *= vscale;
+
+ y[0] = vy0;
+ y[1] = vy1;
+ y += 2;
+ }
+ if XNN_UNLIKELY(n != 0) {
+ int32_t vx = *x;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y = vy;
+ }
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c
new file mode 100644
index 0000000..2cf908b
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c
@@ -0,0 +1,66 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x3(
+ size_t n,
+ const uint8_t* x,
+ float* y,
+ const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(uint8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ for (; n >= 3 * sizeof(uint8_t); n -= 3 * sizeof(uint8_t)) {
+ int32_t vx0 = (int32_t) x[0];
+ int32_t vx1 = (int32_t) x[1];
+ int32_t vx2 = (int32_t) x[2];
+ x += 3;
+
+ vx0 -= vzero_point;
+ vx1 -= vzero_point;
+ vx2 -= vzero_point;
+
+ float vy0 = (float) vx0;
+ float vy1 = (float) vx1;
+ float vy2 = (float) vx2;
+
+ vy0 *= vscale;
+ vy1 *= vscale;
+ vy2 *= vscale;
+
+ y[0] = vy0;
+ y[1] = vy1;
+ y[2] = vy2;
+ y += 3;
+ }
+ if XNN_UNLIKELY(n != 0) {
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(uint8_t);
+ } while (n != 0);
+ }
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c
new file mode 100644
index 0000000..c665670
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c
@@ -0,0 +1,71 @@
+// Auto-generated file. Do not edit!
+// Template: src/qs8-f32-vcvt/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x4(
+ size_t n,
+ const uint8_t* x,
+ float* y,
+ const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+ assert(n != 0);
+ assert(n % sizeof(uint8_t) == 0);
+ assert(x != NULL);
+ assert(y != NULL);
+
+ const int32_t vzero_point = params->scalar.zero_point;
+ const float vscale = params->scalar.scale;
+
+ for (; n >= 4 * sizeof(uint8_t); n -= 4 * sizeof(uint8_t)) {
+ int32_t vx0 = (int32_t) x[0];
+ int32_t vx1 = (int32_t) x[1];
+ int32_t vx2 = (int32_t) x[2];
+ int32_t vx3 = (int32_t) x[3];
+ x += 4;
+
+ vx0 -= vzero_point;
+ vx1 -= vzero_point;
+ vx2 -= vzero_point;
+ vx3 -= vzero_point;
+
+ float vy0 = (float) vx0;
+ float vy1 = (float) vx1;
+ float vy2 = (float) vx2;
+ float vy3 = (float) vx3;
+
+ vy0 *= vscale;
+ vy1 *= vscale;
+ vy2 *= vscale;
+ vy3 *= vscale;
+
+ y[0] = vy0;
+ y[1] = vy1;
+ y[2] = vy2;
+ y[3] = vy3;
+ y += 4;
+ }
+ if XNN_UNLIKELY(n != 0) {
+ do {
+ int32_t vx = *x++;
+ vx -= vzero_point;
+
+ float vy = (float) vx;
+ vy *= vscale;
+ *y++ = vy;
+
+ n -= sizeof(uint8_t);
+ } while (n != 0);
+ }
+}
diff --git a/src/xnnpack/params-init.h b/src/xnnpack/params-init.h
index 4af6d04..bc05495 100644
--- a/src/xnnpack/params-init.h
+++ b/src/xnnpack/params-init.h
@@ -880,6 +880,11 @@
uint8_t output_max);
#endif // XNN_ARCH_WASMSIMD
+XNN_INTERNAL void xnn_init_qs8_f32_cvt_scalar_params(
+ union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+ float scale,
+ int8_t zero_point);
+
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNN_INTERNAL void xnn_init_qs8_f32_cvt_neon_params(
union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
@@ -906,6 +911,11 @@
int8_t zero_point);
#endif // XNN_ARCH_WASMSIMD
+XNN_INTERNAL void xnn_init_qu8_f32_cvt_scalar_params(
+ union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+ float scale,
+ uint8_t zero_point);
+
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNN_INTERNAL void xnn_init_qu8_f32_cvt_neon_params(
union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
diff --git a/src/xnnpack/vcvt.h b/src/xnnpack/vcvt.h
index f8c7704..4df995b 100644
--- a/src/xnnpack/vcvt.h
+++ b/src/xnnpack/vcvt.h
@@ -290,6 +290,11 @@
DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24)
DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32)
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x1)
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x2)
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x3)
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x4)
+
#define DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
@@ -318,6 +323,11 @@
DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24)
DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32)
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x1)
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x2)
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x3)
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x4)
+
#ifdef __cplusplus
} // extern "C"
diff --git a/test/qs8-f32-vcvt.cc b/test/qs8-f32-vcvt.cc
index ebc951a..fb1ec9b 100644
--- a/test/qs8-f32-vcvt.cc
+++ b/test/qs8-f32-vcvt.cc
@@ -935,3 +935,191 @@
}
}
#endif // XNN_ARCH_WASMSIMD
+
+
+TEST(QS8_F32_VCVT__SCALAR_X1, batch_eq_1) {
+ VCvtMicrokernelTester()
+ .batch_size(1)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X1, batch_gt_1) {
+ for (size_t batch_size = 2; batch_size < 10; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X1, scale) {
+ for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X1, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+ }
+}
+
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_eq_2) {
+ VCvtMicrokernelTester()
+ .batch_size(2)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_div_2) {
+ for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_lt_2) {
+ for (size_t batch_size = 1; batch_size < 2; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_gt_2) {
+ for (size_t batch_size = 3; batch_size < 4; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, scale) {
+ for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+ }
+}
+
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_eq_3) {
+ VCvtMicrokernelTester()
+ .batch_size(3)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_div_3) {
+ for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_lt_3) {
+ for (size_t batch_size = 1; batch_size < 3; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_gt_3) {
+ for (size_t batch_size = 4; batch_size < 6; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, scale) {
+ for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+ }
+}
+
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_eq_4) {
+ VCvtMicrokernelTester()
+ .batch_size(4)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_div_4) {
+ for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_lt_4) {
+ for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_gt_4) {
+ for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, scale) {
+ for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+ }
+ }
+}
diff --git a/test/qs8-f32-vcvt.yaml b/test/qs8-f32-vcvt.yaml
index 2705024..c47f8e8 100644
--- a/test/qs8-f32-vcvt.yaml
+++ b/test/qs8-f32-vcvt.yaml
@@ -35,3 +35,11 @@
init: xnn_init_qs8_f32_cvt_wasmsimd_params
- name: xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32
init: xnn_init_qs8_f32_cvt_wasmsimd_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x1
+ init: xnn_init_qs8_f32_cvt_scalar_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x2
+ init: xnn_init_qs8_f32_cvt_scalar_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x3
+ init: xnn_init_qs8_f32_cvt_scalar_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x4
+ init: xnn_init_qs8_f32_cvt_scalar_params
diff --git a/test/qu8-f32-vcvt.cc b/test/qu8-f32-vcvt.cc
index d9a8ebc..94e6ffc 100644
--- a/test/qu8-f32-vcvt.cc
+++ b/test/qu8-f32-vcvt.cc
@@ -935,3 +935,191 @@
}
}
#endif // XNN_ARCH_WASMSIMD
+
+
+TEST(QU8_F32_VCVT__SCALAR_X1, batch_eq_1) {
+ VCvtMicrokernelTester()
+ .batch_size(1)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X1, batch_gt_1) {
+ for (size_t batch_size = 2; batch_size < 10; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X1, scale) {
+ for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X1, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+ }
+}
+
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_eq_2) {
+ VCvtMicrokernelTester()
+ .batch_size(2)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_div_2) {
+ for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_lt_2) {
+ for (size_t batch_size = 1; batch_size < 2; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_gt_2) {
+ for (size_t batch_size = 3; batch_size < 4; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, scale) {
+ for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+ }
+}
+
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_eq_3) {
+ VCvtMicrokernelTester()
+ .batch_size(3)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_div_3) {
+ for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_lt_3) {
+ for (size_t batch_size = 1; batch_size < 3; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_gt_3) {
+ for (size_t batch_size = 4; batch_size < 6; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, scale) {
+ for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+ }
+}
+
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_eq_4) {
+ VCvtMicrokernelTester()
+ .batch_size(4)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_div_4) {
+ for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_lt_4) {
+ for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_gt_4) {
+ for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, scale) {
+ for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .scale(50)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, zero_point) {
+ for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+ for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+ VCvtMicrokernelTester()
+ .batch_size(batch_size)
+ .zero_point(zero_point)
+ .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+ }
+ }
+}
diff --git a/test/qu8-f32-vcvt.yaml b/test/qu8-f32-vcvt.yaml
index e9ecc07..c7ffed6 100644
--- a/test/qu8-f32-vcvt.yaml
+++ b/test/qu8-f32-vcvt.yaml
@@ -35,3 +35,11 @@
init: xnn_init_qu8_f32_cvt_wasmsimd_params
- name: xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32
init: xnn_init_qu8_f32_cvt_wasmsimd_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x1
+ init: xnn_init_qu8_f32_cvt_scalar_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x2
+ init: xnn_init_qu8_f32_cvt_scalar_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x3
+ init: xnn_init_qu8_f32_cvt_scalar_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x4
+ init: xnn_init_qu8_f32_cvt_scalar_params