Scalar QS8/QU8 -> F32 VCVT microkernels

PiperOrigin-RevId: 415466058
diff --git a/BUILD.bazel b/BUILD.bazel
index 7bcb674..339df12 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -782,6 +782,10 @@
     "src/qs8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c",
     "src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c",
     "src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c",
+    "src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c",
+    "src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c",
+    "src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c",
+    "src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c",
     "src/qs8-gavgpool/gen/7p7x-minmax-scalar-c1.c",
     "src/qs8-gavgpool/gen/7p7x-minmax-scalar-c2.c",
     "src/qs8-gavgpool/gen/7p7x-minmax-scalar-c4.c",
@@ -869,6 +873,10 @@
     "src/qu8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c",
     "src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c",
     "src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c",
+    "src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c",
+    "src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c",
+    "src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c",
+    "src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c",
     "src/qu8-gavgpool/7p7x-minmax-scalar-c1.c",
     "src/qu8-gavgpool/7x-minmax-scalar-c1.c",
     "src/qu8-gemm/gen/1x2-minmax-fp32-scalar-lrint.c",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1153494..dd608b1 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -912,6 +912,10 @@
   src/qs8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c
   src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c
   src/qs8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c
+  src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c
+  src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c
+  src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c
+  src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c
   src/qs8-gavgpool/gen/7p7x-minmax-scalar-c1.c
   src/qs8-gavgpool/gen/7p7x-minmax-scalar-c2.c
   src/qs8-gavgpool/gen/7p7x-minmax-scalar-c4.c
@@ -999,6 +1003,10 @@
   src/qu8-dwconv/gen/up4x9-minmax-fp32-scalar-magic.c
   src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-lrint.c
   src/qu8-dwconv/gen/up4x25-minmax-fp32-scalar-magic.c
+  src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c
+  src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c
+  src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c
+  src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c
   src/qu8-gavgpool/7p7x-minmax-scalar-c1.c
   src/qu8-gavgpool/7x-minmax-scalar-c1.c
   src/qu8-gemm/gen/1x2-minmax-fp32-scalar-lrint.c
diff --git a/scripts/generate-qs8-f32-vcvt.sh b/scripts/generate-qs8-f32-vcvt.sh
index 3d1cb38..e6e8ffe 100755
--- a/scripts/generate-qs8-f32-vcvt.sh
+++ b/scripts/generate-qs8-f32-vcvt.sh
@@ -47,6 +47,17 @@
 tools/xngen src/qs8-f32-vcvt/wasmsimd.c.in -D BATCH_TILE=24 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-wasmsimd-x24.c &
 tools/xngen src/qs8-f32-vcvt/wasmsimd.c.in -D BATCH_TILE=32 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-wasmsimd-x32.c &
 
+#################################### Scalar ###################################
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=3 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c &
+
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=3 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c &
+tools/xngen src/qs8-f32-vcvt/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c &
+
 ################################## Unit tests #################################
 tools/generate-vcvt-test.py --spec test/qs8-f32-vcvt.yaml --output test/qs8-f32-vcvt.cc &
 tools/generate-vcvt-test.py --spec test/qu8-f32-vcvt.yaml --output test/qu8-f32-vcvt.cc &
diff --git a/src/params-init.c b/src/params-init.c
index 8eaba10..9c77f97 100644
--- a/src/params-init.c
+++ b/src/params-init.c
@@ -2998,6 +2998,15 @@
 }
 #endif  // XNN_ARCH_WASMSIMD
 
+XNN_INTERNAL void xnn_init_qs8_f32_cvt_scalar_params(
+  union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+  float scale,
+  int8_t zero_point)
+{
+  params->scalar.zero_point = (int32_t) zero_point;
+  params->scalar.scale = scale;
+}
+
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
 XNN_INTERNAL void xnn_init_qs8_f32_cvt_neon_params(
   union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
@@ -3055,6 +3064,16 @@
   }
 }
 #endif  // XNN_ARCH_WASMSIMD
+
+XNN_INTERNAL void xnn_init_qu8_f32_cvt_scalar_params(
+  union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+  float scale,
+  uint8_t zero_point)
+{
+  params->scalar.zero_point = (int32_t) zero_point;
+  params->scalar.scale = scale;
+}
+
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
 XNN_INTERNAL void xnn_init_qu8_f32_cvt_neon_params(
   union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c
new file mode 100644
index 0000000..bcc78d2
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c
@@ -0,0 +1,41 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x1(
+    size_t n,
+    const int8_t* x,
+    float* y,
+    const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(int8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  do {
+    int32_t vx = *x++;
+    vx -= vzero_point;
+
+    float vy = (float) vx;
+    vy *= vscale;
+    *y++ = vy;
+
+    n -= sizeof(int8_t);
+  } while (n != 0);
+}
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c
new file mode 100644
index 0000000..7e55bc1
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x2.c
@@ -0,0 +1,57 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x2(
+    size_t n,
+    const int8_t* x,
+    float* y,
+    const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(int8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  for (; n >= 2 * sizeof(int8_t); n -= 2 * sizeof(int8_t)) {
+    int32_t vx0 = (int32_t) x[0];
+    int32_t vx1 = (int32_t) x[1];
+    x += 2;
+
+    vx0 -= vzero_point;
+    vx1 -= vzero_point;
+
+    float vy0 = (float) vx0;
+    float vy1 = (float) vx1;
+
+    vy0 *= vscale;
+    vy1 *= vscale;
+
+    y[0] = vy0;
+    y[1] = vy1;
+    y += 2;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    int32_t vx = *x;
+    vx -= vzero_point;
+
+    float vy = (float) vx;
+    vy *= vscale;
+    *y = vy;
+  }
+}
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c
new file mode 100644
index 0000000..191530c
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x3.c
@@ -0,0 +1,66 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x3(
+    size_t n,
+    const int8_t* x,
+    float* y,
+    const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(int8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  for (; n >= 3 * sizeof(int8_t); n -= 3 * sizeof(int8_t)) {
+    int32_t vx0 = (int32_t) x[0];
+    int32_t vx1 = (int32_t) x[1];
+    int32_t vx2 = (int32_t) x[2];
+    x += 3;
+
+    vx0 -= vzero_point;
+    vx1 -= vzero_point;
+    vx2 -= vzero_point;
+
+    float vy0 = (float) vx0;
+    float vy1 = (float) vx1;
+    float vy2 = (float) vx2;
+
+    vy0 *= vscale;
+    vy1 *= vscale;
+    vy2 *= vscale;
+
+    y[0] = vy0;
+    y[1] = vy1;
+    y[2] = vy2;
+    y += 3;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    do {
+      int32_t vx = *x++;
+      vx -= vzero_point;
+
+      float vy = (float) vx;
+      vy *= vscale;
+      *y++ = vy;
+
+      n -= sizeof(int8_t);
+    } while (n != 0);
+  }
+}
diff --git a/src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c b/src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c
new file mode 100644
index 0000000..99f5b81
--- /dev/null
+++ b/src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c
@@ -0,0 +1,71 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qs8_f32_vcvt_ukernel__scalar_x4(
+    size_t n,
+    const int8_t* x,
+    float* y,
+    const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(int8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  for (; n >= 4 * sizeof(int8_t); n -= 4 * sizeof(int8_t)) {
+    int32_t vx0 = (int32_t) x[0];
+    int32_t vx1 = (int32_t) x[1];
+    int32_t vx2 = (int32_t) x[2];
+    int32_t vx3 = (int32_t) x[3];
+    x += 4;
+
+    vx0 -= vzero_point;
+    vx1 -= vzero_point;
+    vx2 -= vzero_point;
+    vx3 -= vzero_point;
+
+    float vy0 = (float) vx0;
+    float vy1 = (float) vx1;
+    float vy2 = (float) vx2;
+    float vy3 = (float) vx3;
+
+    vy0 *= vscale;
+    vy1 *= vscale;
+    vy2 *= vscale;
+    vy3 *= vscale;
+
+    y[0] = vy0;
+    y[1] = vy1;
+    y[2] = vy2;
+    y[3] = vy3;
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    do {
+      int32_t vx = *x++;
+      vx -= vzero_point;
+
+      float vy = (float) vx;
+      vy *= vscale;
+      *y++ = vy;
+
+      n -= sizeof(int8_t);
+    } while (n != 0);
+  }
+}
diff --git a/src/qs8-f32-vcvt/scalar.c.in b/src/qs8-f32-vcvt/scalar.c.in
new file mode 100644
index 0000000..173ee1f
--- /dev/null
+++ b/src/qs8-f32-vcvt/scalar.c.in
@@ -0,0 +1,81 @@
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+$assert BATCH_TILE >= 1
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+$XINT8_T = {"QS8": "int8_t", "QU8": "uint8_t"}[DATATYPE]
+void xnn_${DATATYPE.lower()}_f32_vcvt_ukernel__scalar_x${BATCH_TILE}(
+    size_t n,
+    const ${XINT8_T}* x,
+    float* y,
+    const union xnn_${DATATYPE.lower()}_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(${XINT8_T}) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  $if BATCH_TILE > 1:
+    for (; n >= ${BATCH_TILE} * sizeof(${XINT8_T}); n -= ${BATCH_TILE} * sizeof(${XINT8_T})) {
+      $for N in range(BATCH_TILE):
+        int32_t vx${N} = (int32_t) x[${N}];
+      x += ${BATCH_TILE};
+
+      $for N in range(BATCH_TILE):
+        vx${N} -= vzero_point;
+
+      $for N in range(BATCH_TILE):
+        float vy${N} = (float) vx${N};
+
+      $for N in range(BATCH_TILE):
+        vy${N} *= vscale;
+
+      $for N in range(BATCH_TILE):
+        y[${N}] = vy${N};
+      y += ${BATCH_TILE};
+    }
+  $if BATCH_TILE == 1:
+    do {
+      int32_t vx = *x++;
+      vx -= vzero_point;
+
+      float vy = (float) vx;
+      vy *= vscale;
+      *y++ = vy;
+
+      n -= sizeof(${XINT8_T});
+    } while (n != 0);
+  $elif BATCH_TILE == 2:
+    if XNN_UNLIKELY(n != 0) {
+      int32_t vx = *x;
+      vx -= vzero_point;
+
+      float vy = (float) vx;
+      vy *= vscale;
+      *y = vy;
+    }
+  $else:
+    if XNN_UNLIKELY(n != 0) {
+      do {
+        int32_t vx = *x++;
+        vx -= vzero_point;
+
+        float vy = (float) vx;
+        vy *= vscale;
+        *y++ = vy;
+
+        n -= sizeof(${XINT8_T});
+      } while (n != 0);
+    }
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c
new file mode 100644
index 0000000..9923dfd
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c
@@ -0,0 +1,41 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x1(
+    size_t n,
+    const uint8_t* x,
+    float* y,
+    const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(uint8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  do {
+    int32_t vx = *x++;
+    vx -= vzero_point;
+
+    float vy = (float) vx;
+    vy *= vscale;
+    *y++ = vy;
+
+    n -= sizeof(uint8_t);
+  } while (n != 0);
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c
new file mode 100644
index 0000000..e7d269e
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c
@@ -0,0 +1,57 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x2(
+    size_t n,
+    const uint8_t* x,
+    float* y,
+    const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(uint8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  for (; n >= 2 * sizeof(uint8_t); n -= 2 * sizeof(uint8_t)) {
+    int32_t vx0 = (int32_t) x[0];
+    int32_t vx1 = (int32_t) x[1];
+    x += 2;
+
+    vx0 -= vzero_point;
+    vx1 -= vzero_point;
+
+    float vy0 = (float) vx0;
+    float vy1 = (float) vx1;
+
+    vy0 *= vscale;
+    vy1 *= vscale;
+
+    y[0] = vy0;
+    y[1] = vy1;
+    y += 2;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    int32_t vx = *x;
+    vx -= vzero_point;
+
+    float vy = (float) vx;
+    vy *= vscale;
+    *y = vy;
+  }
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c
new file mode 100644
index 0000000..2cf908b
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c
@@ -0,0 +1,66 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x3(
+    size_t n,
+    const uint8_t* x,
+    float* y,
+    const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(uint8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  for (; n >= 3 * sizeof(uint8_t); n -= 3 * sizeof(uint8_t)) {
+    int32_t vx0 = (int32_t) x[0];
+    int32_t vx1 = (int32_t) x[1];
+    int32_t vx2 = (int32_t) x[2];
+    x += 3;
+
+    vx0 -= vzero_point;
+    vx1 -= vzero_point;
+    vx2 -= vzero_point;
+
+    float vy0 = (float) vx0;
+    float vy1 = (float) vx1;
+    float vy2 = (float) vx2;
+
+    vy0 *= vscale;
+    vy1 *= vscale;
+    vy2 *= vscale;
+
+    y[0] = vy0;
+    y[1] = vy1;
+    y[2] = vy2;
+    y += 3;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    do {
+      int32_t vx = *x++;
+      vx -= vzero_point;
+
+      float vy = (float) vx;
+      vy *= vscale;
+      *y++ = vy;
+
+      n -= sizeof(uint8_t);
+    } while (n != 0);
+  }
+}
diff --git a/src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c b/src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c
new file mode 100644
index 0000000..c665670
--- /dev/null
+++ b/src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c
@@ -0,0 +1,71 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-f32-vcvt/scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2021 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vcvt.h>
+
+
+void xnn_qu8_f32_vcvt_ukernel__scalar_x4(
+    size_t n,
+    const uint8_t* x,
+    float* y,
+    const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(uint8_t) == 0);
+  assert(x != NULL);
+  assert(y != NULL);
+
+  const int32_t vzero_point = params->scalar.zero_point;
+  const float vscale = params->scalar.scale;
+
+  for (; n >= 4 * sizeof(uint8_t); n -= 4 * sizeof(uint8_t)) {
+    int32_t vx0 = (int32_t) x[0];
+    int32_t vx1 = (int32_t) x[1];
+    int32_t vx2 = (int32_t) x[2];
+    int32_t vx3 = (int32_t) x[3];
+    x += 4;
+
+    vx0 -= vzero_point;
+    vx1 -= vzero_point;
+    vx2 -= vzero_point;
+    vx3 -= vzero_point;
+
+    float vy0 = (float) vx0;
+    float vy1 = (float) vx1;
+    float vy2 = (float) vx2;
+    float vy3 = (float) vx3;
+
+    vy0 *= vscale;
+    vy1 *= vscale;
+    vy2 *= vscale;
+    vy3 *= vscale;
+
+    y[0] = vy0;
+    y[1] = vy1;
+    y[2] = vy2;
+    y[3] = vy3;
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    do {
+      int32_t vx = *x++;
+      vx -= vzero_point;
+
+      float vy = (float) vx;
+      vy *= vscale;
+      *y++ = vy;
+
+      n -= sizeof(uint8_t);
+    } while (n != 0);
+  }
+}
diff --git a/src/xnnpack/params-init.h b/src/xnnpack/params-init.h
index 4af6d04..bc05495 100644
--- a/src/xnnpack/params-init.h
+++ b/src/xnnpack/params-init.h
@@ -880,6 +880,11 @@
   uint8_t output_max);
 #endif  // XNN_ARCH_WASMSIMD
 
+XNN_INTERNAL void xnn_init_qs8_f32_cvt_scalar_params(
+  union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+  float scale,
+  int8_t zero_point);
+
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
 XNN_INTERNAL void xnn_init_qs8_f32_cvt_neon_params(
   union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
@@ -906,6 +911,11 @@
   int8_t zero_point);
 #endif  // XNN_ARCH_WASMSIMD
 
+XNN_INTERNAL void xnn_init_qu8_f32_cvt_scalar_params(
+  union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
+  float scale,
+  uint8_t zero_point);
+
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
 XNN_INTERNAL void xnn_init_qu8_f32_cvt_neon_params(
   union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
diff --git a/src/xnnpack/vcvt.h b/src/xnnpack/vcvt.h
index f8c7704..4df995b 100644
--- a/src/xnnpack/vcvt.h
+++ b/src/xnnpack/vcvt.h
@@ -290,6 +290,11 @@
 DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24)
 DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32)
 
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x1)
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x2)
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x3)
+DECLARE_QS8_F32_VCVT_UKERNEL_FUNCTION(xnn_qs8_f32_vcvt_ukernel__scalar_x4)
+
 
 #define DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(fn_name) \
   XNN_INTERNAL void fn_name(                           \
@@ -318,6 +323,11 @@
 DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24)
 DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32)
 
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x1)
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x2)
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x3)
+DECLARE_QU8_F32_VCVT_UKERNEL_FUNCTION(xnn_qu8_f32_vcvt_ukernel__scalar_x4)
+
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/test/qs8-f32-vcvt.cc b/test/qs8-f32-vcvt.cc
index ebc951a..fb1ec9b 100644
--- a/test/qs8-f32-vcvt.cc
+++ b/test/qs8-f32-vcvt.cc
@@ -935,3 +935,191 @@
     }
   }
 #endif  // XNN_ARCH_WASMSIMD
+
+
+TEST(QS8_F32_VCVT__SCALAR_X1, batch_eq_1) {
+  VCvtMicrokernelTester()
+    .batch_size(1)
+    .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X1, batch_gt_1) {
+  for (size_t batch_size = 2; batch_size < 10; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X1, scale) {
+  for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X1, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x1, xnn_init_qs8_f32_cvt_scalar_params);
+    }
+  }
+}
+
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_eq_2) {
+  VCvtMicrokernelTester()
+    .batch_size(2)
+    .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_div_2) {
+  for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_lt_2) {
+  for (size_t batch_size = 1; batch_size < 2; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, batch_gt_2) {
+  for (size_t batch_size = 3; batch_size < 4; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, scale) {
+  for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X2, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x2, xnn_init_qs8_f32_cvt_scalar_params);
+    }
+  }
+}
+
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_eq_3) {
+  VCvtMicrokernelTester()
+    .batch_size(3)
+    .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_div_3) {
+  for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_lt_3) {
+  for (size_t batch_size = 1; batch_size < 3; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, batch_gt_3) {
+  for (size_t batch_size = 4; batch_size < 6; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, scale) {
+  for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X3, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x3, xnn_init_qs8_f32_cvt_scalar_params);
+    }
+  }
+}
+
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_eq_4) {
+  VCvtMicrokernelTester()
+    .batch_size(4)
+    .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_div_4) {
+  for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_lt_4) {
+  for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, batch_gt_4) {
+  for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, scale) {
+  for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QS8_F32_VCVT__SCALAR_X4, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qs8_f32_vcvt_ukernel__scalar_x4, xnn_init_qs8_f32_cvt_scalar_params);
+    }
+  }
+}
diff --git a/test/qs8-f32-vcvt.yaml b/test/qs8-f32-vcvt.yaml
index 2705024..c47f8e8 100644
--- a/test/qs8-f32-vcvt.yaml
+++ b/test/qs8-f32-vcvt.yaml
@@ -35,3 +35,11 @@
   init: xnn_init_qs8_f32_cvt_wasmsimd_params
 - name: xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32
   init: xnn_init_qs8_f32_cvt_wasmsimd_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x1
+  init: xnn_init_qs8_f32_cvt_scalar_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x2
+  init: xnn_init_qs8_f32_cvt_scalar_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x3
+  init: xnn_init_qs8_f32_cvt_scalar_params
+- name: xnn_qs8_f32_vcvt_ukernel__scalar_x4
+  init: xnn_init_qs8_f32_cvt_scalar_params
diff --git a/test/qu8-f32-vcvt.cc b/test/qu8-f32-vcvt.cc
index d9a8ebc..94e6ffc 100644
--- a/test/qu8-f32-vcvt.cc
+++ b/test/qu8-f32-vcvt.cc
@@ -935,3 +935,191 @@
     }
   }
 #endif  // XNN_ARCH_WASMSIMD
+
+
+TEST(QU8_F32_VCVT__SCALAR_X1, batch_eq_1) {
+  VCvtMicrokernelTester()
+    .batch_size(1)
+    .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X1, batch_gt_1) {
+  for (size_t batch_size = 2; batch_size < 10; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X1, scale) {
+  for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X1, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x1, xnn_init_qu8_f32_cvt_scalar_params);
+    }
+  }
+}
+
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_eq_2) {
+  VCvtMicrokernelTester()
+    .batch_size(2)
+    .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_div_2) {
+  for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_lt_2) {
+  for (size_t batch_size = 1; batch_size < 2; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, batch_gt_2) {
+  for (size_t batch_size = 3; batch_size < 4; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, scale) {
+  for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X2, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x2, xnn_init_qu8_f32_cvt_scalar_params);
+    }
+  }
+}
+
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_eq_3) {
+  VCvtMicrokernelTester()
+    .batch_size(3)
+    .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_div_3) {
+  for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_lt_3) {
+  for (size_t batch_size = 1; batch_size < 3; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, batch_gt_3) {
+  for (size_t batch_size = 4; batch_size < 6; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, scale) {
+  for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X3, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x3, xnn_init_qu8_f32_cvt_scalar_params);
+    }
+  }
+}
+
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_eq_4) {
+  VCvtMicrokernelTester()
+    .batch_size(4)
+    .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_div_4) {
+  for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_lt_4) {
+  for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, batch_gt_4) {
+  for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, scale) {
+  for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+    VCvtMicrokernelTester()
+      .batch_size(batch_size)
+      .scale(50)
+      .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+  }
+}
+
+TEST(QU8_F32_VCVT__SCALAR_X4, zero_point) {
+  for (int16_t zero_point = 0; zero_point < 5; zero_point += 2) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VCvtMicrokernelTester()
+        .batch_size(batch_size)
+        .zero_point(zero_point)
+        .Test(xnn_qu8_f32_vcvt_ukernel__scalar_x4, xnn_init_qu8_f32_cvt_scalar_params);
+    }
+  }
+}
diff --git a/test/qu8-f32-vcvt.yaml b/test/qu8-f32-vcvt.yaml
index e9ecc07..c7ffed6 100644
--- a/test/qu8-f32-vcvt.yaml
+++ b/test/qu8-f32-vcvt.yaml
@@ -35,3 +35,11 @@
   init: xnn_init_qu8_f32_cvt_wasmsimd_params
 - name: xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32
   init: xnn_init_qu8_f32_cvt_wasmsimd_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x1
+  init: xnn_init_qu8_f32_cvt_scalar_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x2
+  init: xnn_init_qu8_f32_cvt_scalar_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x3
+  init: xnn_init_qu8_f32_cvt_scalar_params
+- name: xnn_qu8_f32_vcvt_ukernel__scalar_x4
+  init: xnn_init_qu8_f32_cvt_scalar_params